1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscBool aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscBool roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscBool ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash || mat->nooffprocentries) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 mat->insertmode = addv; /* in case this processor had no cache */ 470 471 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 483 PetscErrorCode ierr; 484 PetscMPIInt n; 485 PetscInt i,j,rstart,ncols,flg; 486 PetscInt *row,*col; 487 PetscBool other_disassembled; 488 PetscScalar *val; 489 InsertMode addv = mat->insertmode; 490 491 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 492 PetscFunctionBegin; 493 if (!aij->donotstash && !mat->nooffprocentries) { 494 while (1) { 495 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 496 if (!flg) break; 497 498 for (i=0; i<n;) { 499 /* Now identify the consecutive vals belonging to the same row */ 500 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 501 if (j < n) ncols = j-i; 502 else ncols = n-i; 503 /* Now assemble all these values with a single function call */ 504 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 505 i = j; 506 } 507 } 508 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 509 } 510 a->compressedrow.use = PETSC_FALSE; 511 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 512 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 513 514 /* determine if any processor has disassembled, if so we must 515 also disassemble ourselfs, in order that we may reassemble. */ 516 /* 517 if nonzero structure of submatrix B cannot change then we know that 518 no processor disassembled thus we can skip this stuff 519 */ 520 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 521 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 522 if (mat->was_assembled && !other_disassembled) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 } 525 } 526 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 527 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 530 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 531 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 533 534 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 535 aij->rowvalues = 0; 536 537 /* used by MatAXPY() */ 538 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 539 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 540 541 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 542 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscBool found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 if (A->nooffproczerorows) { 606 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 607 nrecvs = nsends; 608 nmax = N; 609 } else { 610 /* inform other processors of number of messages and max length*/ 611 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 612 } 613 614 /* post receives: */ 615 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 616 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 617 for (i=0; i<nrecvs; i++) { 618 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 619 } 620 621 /* do sends: 622 1) starts[i] gives the starting index in svalues for stuff going to 623 the ith processor 624 */ 625 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 626 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 627 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 628 starts[0] = 0; 629 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 for (i=0; i<N; i++) { 631 svalues[starts[owner[i]]++] = rows[i]; 632 } 633 634 starts[0] = 0; 635 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 636 count = 0; 637 for (i=0; i<size; i++) { 638 if (nprocs[2*i+1]) { 639 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 640 } 641 } 642 ierr = PetscFree(starts);CHKERRQ(ierr); 643 644 base = owners[rank]; 645 646 /* wait on receives */ 647 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 648 count = nrecvs; slen = 0; 649 while (count) { 650 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 651 /* unpack receives into our local space */ 652 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 653 source[imdex] = recv_status.MPI_SOURCE; 654 lens[imdex] = n; 655 slen += n; 656 count--; 657 } 658 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 659 660 /* move the data into the send scatter */ 661 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 662 count = 0; 663 for (i=0; i<nrecvs; i++) { 664 values = rvalues + i*nmax; 665 for (j=0; j<lens[i]; j++) { 666 lrows[count++] = values[j] - base; 667 } 668 } 669 ierr = PetscFree(rvalues);CHKERRQ(ierr); 670 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 671 ierr = PetscFree(owner);CHKERRQ(ierr); 672 ierr = PetscFree(nprocs);CHKERRQ(ierr); 673 674 /* actually zap the local rows */ 675 /* 676 Zero the required rows. If the "diagonal block" of the matrix 677 is square and the user wishes to set the diagonal we use separate 678 code so that MatSetValues() is not called for each diagonal allocating 679 new memory, thus calling lots of mallocs and slowing things down. 680 681 */ 682 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 683 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 684 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 685 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 686 } else if (diag != 0.0) { 687 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 688 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 689 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 691 } 692 for (i = 0; i < slen; i++) { 693 row = lrows[i] + rstart; 694 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 695 } 696 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 697 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 698 } else { 699 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 700 } 701 ierr = PetscFree(lrows);CHKERRQ(ierr); 702 703 /* wait on sends */ 704 if (nsends) { 705 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 706 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 707 ierr = PetscFree(send_status);CHKERRQ(ierr); 708 } 709 ierr = PetscFree(send_waits);CHKERRQ(ierr); 710 ierr = PetscFree(svalues);CHKERRQ(ierr); 711 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatMult_MPIAIJ" 717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 PetscInt nt; 722 723 PetscFunctionBegin; 724 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 725 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 726 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 728 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 729 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 735 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 736 { 737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 738 PetscErrorCode ierr; 739 740 PetscFunctionBegin; 741 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatMultAdd_MPIAIJ" 747 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 748 { 749 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 755 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 756 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 762 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 763 { 764 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 765 PetscErrorCode ierr; 766 PetscBool merged; 767 768 PetscFunctionBegin; 769 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 770 /* do nondiagonal part */ 771 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 772 if (!merged) { 773 /* send it on its way */ 774 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 775 /* do local part */ 776 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 777 /* receive remote parts: note this assumes the values are not actually */ 778 /* added in yy until the next line, */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } else { 781 /* do local part */ 782 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 783 /* send it on its way */ 784 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 /* values actually were received in the Begin() but we need to call this nop */ 786 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 787 } 788 PetscFunctionReturn(0); 789 } 790 791 EXTERN_C_BEGIN 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 794 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 795 { 796 MPI_Comm comm; 797 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 798 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 799 IS Me,Notme; 800 PetscErrorCode ierr; 801 PetscInt M,N,first,last,*notme,i; 802 PetscMPIInt size; 803 804 PetscFunctionBegin; 805 806 /* Easy test: symmetric diagonal block */ 807 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 808 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 809 if (!*f) PetscFunctionReturn(0); 810 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 811 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 812 if (size == 1) PetscFunctionReturn(0); 813 814 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 815 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 816 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 817 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 818 for (i=0; i<first; i++) notme[i] = i; 819 for (i=last; i<M; i++) notme[i-last+first] = i; 820 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 821 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 822 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 823 Aoff = Aoffs[0]; 824 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 825 Boff = Boffs[0]; 826 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 827 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 828 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 829 ierr = ISDestroy(Me);CHKERRQ(ierr); 830 ierr = ISDestroy(Notme);CHKERRQ(ierr); 831 832 PetscFunctionReturn(0); 833 } 834 EXTERN_C_END 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 838 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 839 { 840 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 843 PetscFunctionBegin; 844 /* do nondiagonal part */ 845 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 846 /* send it on its way */ 847 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 848 /* do local part */ 849 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 850 /* receive remote parts */ 851 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 /* 856 This only works correctly for square matrices where the subblock A->A is the 857 diagonal block 858 */ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 861 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 862 { 863 PetscErrorCode ierr; 864 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 865 866 PetscFunctionBegin; 867 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 868 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 900 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatView_MPIAIJ_Binary" 927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 928 { 929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 930 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 931 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 932 PetscErrorCode ierr; 933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 934 int fd; 935 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 936 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 937 PetscScalar *column_values; 938 939 PetscFunctionBegin; 940 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 941 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 942 nz = A->nz + B->nz; 943 if (!rank) { 944 header[0] = MAT_FILE_CLASSID; 945 header[1] = mat->rmap->N; 946 header[2] = mat->cmap->N; 947 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 948 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 949 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 950 /* get largest number of rows any processor has */ 951 rlen = mat->rmap->n; 952 range = mat->rmap->range; 953 for (i=1; i<size; i++) { 954 rlen = PetscMax(rlen,range[i+1] - range[i]); 955 } 956 } else { 957 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 958 rlen = mat->rmap->n; 959 } 960 961 /* load up the local row counts */ 962 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 963 for (i=0; i<mat->rmap->n; i++) { 964 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 965 } 966 967 /* store the row lengths to the file */ 968 if (!rank) { 969 MPI_Status status; 970 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 971 for (i=1; i<size; i++) { 972 rlen = range[i+1] - range[i]; 973 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 974 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 975 } 976 } else { 977 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 980 981 /* load up the local column indices */ 982 nzmax = nz; /* )th processor needs space a largest processor needs */ 983 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 984 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 985 cnt = 0; 986 for (i=0; i<mat->rmap->n; i++) { 987 for (j=B->i[i]; j<B->i[i+1]; j++) { 988 if ( (col = garray[B->j[j]]) > cstart) break; 989 column_indices[cnt++] = col; 990 } 991 for (k=A->i[i]; k<A->i[i+1]; k++) { 992 column_indices[cnt++] = A->j[k] + cstart; 993 } 994 for (; j<B->i[i+1]; j++) { 995 column_indices[cnt++] = garray[B->j[j]]; 996 } 997 } 998 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 999 1000 /* store the column indices to the file */ 1001 if (!rank) { 1002 MPI_Status status; 1003 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1004 for (i=1; i<size; i++) { 1005 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1006 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1007 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1008 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1009 } 1010 } else { 1011 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the local column values */ 1017 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<mat->rmap->n; i++) { 1020 for (j=B->i[i]; j<B->i[i+1]; j++) { 1021 if ( garray[B->j[j]] > cstart) break; 1022 column_values[cnt++] = B->a[j]; 1023 } 1024 for (k=A->i[i]; k<A->i[i+1]; k++) { 1025 column_values[cnt++] = A->a[k]; 1026 } 1027 for (; j<B->i[i+1]; j++) { 1028 column_values[cnt++] = B->a[j]; 1029 } 1030 } 1031 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1032 1033 /* store the column values to the file */ 1034 if (!rank) { 1035 MPI_Status status; 1036 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1037 for (i=1; i<size; i++) { 1038 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1039 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1040 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1041 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1042 } 1043 } else { 1044 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 } 1047 ierr = PetscFree(column_values);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1054 { 1055 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1056 PetscErrorCode ierr; 1057 PetscMPIInt rank = aij->rank,size = aij->size; 1058 PetscBool isdraw,iascii,isbinary; 1059 PetscViewer sviewer; 1060 PetscViewerFormat format; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1066 if (iascii) { 1067 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1068 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1069 MatInfo info; 1070 PetscBool inodes; 1071 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1074 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1075 if (!inodes) { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } else { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } 1082 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1084 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1086 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1088 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1091 PetscInt inodecount,inodelimit,*inodes; 1092 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1093 if (inodes) { 1094 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1095 } else { 1096 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1097 } 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1100 PetscFunctionReturn(0); 1101 } 1102 } else if (isbinary) { 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } else if (isdraw) { 1111 PetscDraw draw; 1112 PetscBool isnull; 1113 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1114 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1115 } 1116 1117 if (size == 1) { 1118 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1119 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1120 } else { 1121 /* assemble the entire matrix onto first processor. */ 1122 Mat A; 1123 Mat_SeqAIJ *Aloc; 1124 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1125 MatScalar *a; 1126 1127 if (mat->rmap->N > 1024) { 1128 PetscBool flg = PETSC_FALSE; 1129 1130 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1131 if (!flg) { 1132 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1133 } 1134 } 1135 1136 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1137 if (!rank) { 1138 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1141 } 1142 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1143 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1144 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1145 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1146 1147 /* copy over the A part */ 1148 Aloc = (Mat_SeqAIJ*)aij->A->data; 1149 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 row = mat->rmap->rstart; 1151 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1155 } 1156 aj = Aloc->j; 1157 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1158 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqAIJ*)aij->B->data; 1161 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 row = mat->rmap->rstart; 1163 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1164 ct = cols; 1165 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1166 for (i=0; i<m; i++) { 1167 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1168 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1169 } 1170 ierr = PetscFree(ct);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1180 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1181 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1182 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1183 } 1184 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1185 ierr = MatDestroy(A);CHKERRQ(ierr); 1186 } 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatView_MPIAIJ" 1192 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1193 { 1194 PetscErrorCode ierr; 1195 PetscBool iascii,isdraw,issocket,isbinary; 1196 1197 PetscFunctionBegin; 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1202 if (iascii || isdraw || isbinary || issocket) { 1203 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1204 } else { 1205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1206 } 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatSOR_MPIAIJ" 1212 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1213 { 1214 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1215 PetscErrorCode ierr; 1216 Vec bb1 = 0; 1217 PetscBool hasop; 1218 1219 PetscFunctionBegin; 1220 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1221 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1222 } 1223 1224 if (flag == SOR_APPLY_UPPER) { 1225 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1230 if (flag & SOR_ZERO_INITIAL_GUESS) { 1231 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1232 its--; 1233 } 1234 1235 while (its--) { 1236 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1237 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 1239 /* update rhs: bb1 = bb - B*x */ 1240 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1241 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1242 1243 /* local sweep */ 1244 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1245 } 1246 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1247 if (flag & SOR_ZERO_INITIAL_GUESS) { 1248 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1249 its--; 1250 } 1251 while (its--) { 1252 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1254 1255 /* update rhs: bb1 = bb - B*x */ 1256 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1257 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1258 1259 /* local sweep */ 1260 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1261 } 1262 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1263 if (flag & SOR_ZERO_INITIAL_GUESS) { 1264 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1265 its--; 1266 } 1267 while (its--) { 1268 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 1271 /* update rhs: bb1 = bb - B*x */ 1272 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1273 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1274 1275 /* local sweep */ 1276 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1277 } 1278 } else if (flag & SOR_EISENSTAT) { 1279 Vec xx1; 1280 1281 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1282 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1283 1284 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1285 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1286 if (!mat->diag) { 1287 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1288 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1289 } 1290 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1291 if (hasop) { 1292 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1293 } else { 1294 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1295 } 1296 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1297 1298 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1299 1300 /* local sweep */ 1301 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1302 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1303 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1304 } else { 1305 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1306 } 1307 1308 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatPermute_MPIAIJ" 1314 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1315 { 1316 MPI_Comm comm,pcomm; 1317 PetscInt first,local_size,nrows; 1318 const PetscInt *rows; 1319 PetscMPIInt size; 1320 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1321 PetscErrorCode ierr; 1322 1323 PetscFunctionBegin; 1324 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1325 /* make a collective version of 'rowp' */ 1326 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1327 if (pcomm==comm) { 1328 crowp = rowp; 1329 } else { 1330 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1331 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1332 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1333 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1334 } 1335 /* collect the global row permutation and invert it */ 1336 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1337 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1338 if (pcomm!=comm) { 1339 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1340 } 1341 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1342 /* get the local target indices */ 1343 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1344 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1345 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1346 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1348 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1349 /* the column permutation is so much easier; 1350 make a local version of 'colp' and invert it */ 1351 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1352 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1353 if (size==1) { 1354 lcolp = colp; 1355 } else { 1356 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1357 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1358 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1359 } 1360 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1361 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1362 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1363 if (size>1) { 1364 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1365 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1366 } 1367 /* now we just get the submatrix */ 1368 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1369 /* clean up */ 1370 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1371 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1377 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1378 { 1379 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1380 Mat A = mat->A,B = mat->B; 1381 PetscErrorCode ierr; 1382 PetscReal isend[5],irecv[5]; 1383 1384 PetscFunctionBegin; 1385 info->block_size = 1.0; 1386 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1387 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1388 isend[3] = info->memory; isend[4] = info->mallocs; 1389 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1390 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1391 isend[3] += info->memory; isend[4] += info->mallocs; 1392 if (flag == MAT_LOCAL) { 1393 info->nz_used = isend[0]; 1394 info->nz_allocated = isend[1]; 1395 info->nz_unneeded = isend[2]; 1396 info->memory = isend[3]; 1397 info->mallocs = isend[4]; 1398 } else if (flag == MAT_GLOBAL_MAX) { 1399 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1400 info->nz_used = irecv[0]; 1401 info->nz_allocated = irecv[1]; 1402 info->nz_unneeded = irecv[2]; 1403 info->memory = irecv[3]; 1404 info->mallocs = irecv[4]; 1405 } else if (flag == MAT_GLOBAL_SUM) { 1406 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1407 info->nz_used = irecv[0]; 1408 info->nz_allocated = irecv[1]; 1409 info->nz_unneeded = irecv[2]; 1410 info->memory = irecv[3]; 1411 info->mallocs = irecv[4]; 1412 } 1413 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1414 info->fill_ratio_needed = 0; 1415 info->factor_mallocs = 0; 1416 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatSetOption_MPIAIJ" 1422 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1423 { 1424 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1425 PetscErrorCode ierr; 1426 1427 PetscFunctionBegin; 1428 switch (op) { 1429 case MAT_NEW_NONZERO_LOCATIONS: 1430 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1431 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1432 case MAT_KEEP_NONZERO_PATTERN: 1433 case MAT_NEW_NONZERO_LOCATION_ERR: 1434 case MAT_USE_INODES: 1435 case MAT_IGNORE_ZERO_ENTRIES: 1436 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1437 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1438 break; 1439 case MAT_ROW_ORIENTED: 1440 a->roworiented = flg; 1441 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1442 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1443 break; 1444 case MAT_NEW_DIAGONALS: 1445 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1446 break; 1447 case MAT_IGNORE_OFF_PROC_ENTRIES: 1448 a->donotstash = PETSC_TRUE; 1449 break; 1450 case MAT_SPD: 1451 A->spd_set = PETSC_TRUE; 1452 A->spd = flg; 1453 if (flg) { 1454 A->symmetric = PETSC_TRUE; 1455 A->structurally_symmetric = PETSC_TRUE; 1456 A->symmetric_set = PETSC_TRUE; 1457 A->structurally_symmetric_set = PETSC_TRUE; 1458 } 1459 break; 1460 case MAT_SYMMETRIC: 1461 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1462 break; 1463 case MAT_STRUCTURALLY_SYMMETRIC: 1464 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1465 break; 1466 case MAT_HERMITIAN: 1467 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1468 break; 1469 case MAT_SYMMETRY_ETERNAL: 1470 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1471 break; 1472 default: 1473 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1474 } 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatGetRow_MPIAIJ" 1480 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1481 { 1482 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1483 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1484 PetscErrorCode ierr; 1485 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1486 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1487 PetscInt *cmap,*idx_p; 1488 1489 PetscFunctionBegin; 1490 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1491 mat->getrowactive = PETSC_TRUE; 1492 1493 if (!mat->rowvalues && (idx || v)) { 1494 /* 1495 allocate enough space to hold information from the longest row. 1496 */ 1497 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1498 PetscInt max = 1,tmp; 1499 for (i=0; i<matin->rmap->n; i++) { 1500 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1501 if (max < tmp) { max = tmp; } 1502 } 1503 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1504 } 1505 1506 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1507 lrow = row - rstart; 1508 1509 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1510 if (!v) {pvA = 0; pvB = 0;} 1511 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1512 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1513 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1514 nztot = nzA + nzB; 1515 1516 cmap = mat->garray; 1517 if (v || idx) { 1518 if (nztot) { 1519 /* Sort by increasing column numbers, assuming A and B already sorted */ 1520 PetscInt imark = -1; 1521 if (v) { 1522 *v = v_p = mat->rowvalues; 1523 for (i=0; i<nzB; i++) { 1524 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1525 else break; 1526 } 1527 imark = i; 1528 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1529 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1530 } 1531 if (idx) { 1532 *idx = idx_p = mat->rowindices; 1533 if (imark > -1) { 1534 for (i=0; i<imark; i++) { 1535 idx_p[i] = cmap[cworkB[i]]; 1536 } 1537 } else { 1538 for (i=0; i<nzB; i++) { 1539 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1540 else break; 1541 } 1542 imark = i; 1543 } 1544 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1545 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1546 } 1547 } else { 1548 if (idx) *idx = 0; 1549 if (v) *v = 0; 1550 } 1551 } 1552 *nz = nztot; 1553 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1554 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1560 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1561 { 1562 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1563 1564 PetscFunctionBegin; 1565 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1566 aij->getrowactive = PETSC_FALSE; 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatNorm_MPIAIJ" 1572 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1573 { 1574 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1575 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1576 PetscErrorCode ierr; 1577 PetscInt i,j,cstart = mat->cmap->rstart; 1578 PetscReal sum = 0.0; 1579 MatScalar *v; 1580 1581 PetscFunctionBegin; 1582 if (aij->size == 1) { 1583 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1584 } else { 1585 if (type == NORM_FROBENIUS) { 1586 v = amat->a; 1587 for (i=0; i<amat->nz; i++) { 1588 #if defined(PETSC_USE_COMPLEX) 1589 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1590 #else 1591 sum += (*v)*(*v); v++; 1592 #endif 1593 } 1594 v = bmat->a; 1595 for (i=0; i<bmat->nz; i++) { 1596 #if defined(PETSC_USE_COMPLEX) 1597 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1598 #else 1599 sum += (*v)*(*v); v++; 1600 #endif 1601 } 1602 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1603 *norm = sqrt(*norm); 1604 } else if (type == NORM_1) { /* max column norm */ 1605 PetscReal *tmp,*tmp2; 1606 PetscInt *jj,*garray = aij->garray; 1607 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1608 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1609 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1610 *norm = 0.0; 1611 v = amat->a; jj = amat->j; 1612 for (j=0; j<amat->nz; j++) { 1613 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1614 } 1615 v = bmat->a; jj = bmat->j; 1616 for (j=0; j<bmat->nz; j++) { 1617 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1618 } 1619 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1620 for (j=0; j<mat->cmap->N; j++) { 1621 if (tmp2[j] > *norm) *norm = tmp2[j]; 1622 } 1623 ierr = PetscFree(tmp);CHKERRQ(ierr); 1624 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1625 } else if (type == NORM_INFINITY) { /* max row norm */ 1626 PetscReal ntemp = 0.0; 1627 for (j=0; j<aij->A->rmap->n; j++) { 1628 v = amat->a + amat->i[j]; 1629 sum = 0.0; 1630 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1631 sum += PetscAbsScalar(*v); v++; 1632 } 1633 v = bmat->a + bmat->i[j]; 1634 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1635 sum += PetscAbsScalar(*v); v++; 1636 } 1637 if (sum > ntemp) ntemp = sum; 1638 } 1639 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1640 } else { 1641 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1642 } 1643 } 1644 PetscFunctionReturn(0); 1645 } 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "MatTranspose_MPIAIJ" 1649 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1650 { 1651 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1652 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1653 PetscErrorCode ierr; 1654 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1655 PetscInt cstart=A->cmap->rstart,ncol; 1656 Mat B; 1657 MatScalar *array; 1658 1659 PetscFunctionBegin; 1660 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1661 1662 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1663 ai = Aloc->i; aj = Aloc->j; 1664 bi = Bloc->i; bj = Bloc->j; 1665 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1666 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1667 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1668 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1669 for (i=0; i<ai[ma]; i++){ 1670 d_nnz[aj[i]] ++; 1671 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1672 } 1673 1674 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1675 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1676 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1677 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1678 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1679 } else { 1680 B = *matout; 1681 } 1682 1683 /* copy over the A part */ 1684 array = Aloc->a; 1685 row = A->rmap->rstart; 1686 for (i=0; i<ma; i++) { 1687 ncol = ai[i+1]-ai[i]; 1688 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1689 row++; array += ncol; aj += ncol; 1690 } 1691 aj = Aloc->j; 1692 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1693 1694 /* copy over the B part */ 1695 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1696 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1697 array = Bloc->a; 1698 row = A->rmap->rstart; 1699 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1700 cols_tmp = cols; 1701 for (i=0; i<mb; i++) { 1702 ncol = bi[i+1]-bi[i]; 1703 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1704 row++; array += ncol; cols_tmp += ncol; 1705 } 1706 ierr = PetscFree(cols);CHKERRQ(ierr); 1707 1708 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1709 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1710 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1711 *matout = B; 1712 } else { 1713 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1714 } 1715 PetscFunctionReturn(0); 1716 } 1717 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1720 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1721 { 1722 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1723 Mat a = aij->A,b = aij->B; 1724 PetscErrorCode ierr; 1725 PetscInt s1,s2,s3; 1726 1727 PetscFunctionBegin; 1728 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1729 if (rr) { 1730 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1731 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1732 /* Overlap communication with computation. */ 1733 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1734 } 1735 if (ll) { 1736 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1737 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1738 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1739 } 1740 /* scale the diagonal block */ 1741 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1742 1743 if (rr) { 1744 /* Do a scatter end and then right scale the off-diagonal block */ 1745 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1746 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1747 } 1748 1749 PetscFunctionReturn(0); 1750 } 1751 1752 #undef __FUNCT__ 1753 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1754 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1755 { 1756 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1757 PetscErrorCode ierr; 1758 1759 PetscFunctionBegin; 1760 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1761 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1762 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1763 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1764 PetscFunctionReturn(0); 1765 } 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1768 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1769 { 1770 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1771 PetscErrorCode ierr; 1772 1773 PetscFunctionBegin; 1774 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #undef __FUNCT__ 1779 #define __FUNCT__ "MatEqual_MPIAIJ" 1780 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 1781 { 1782 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1783 Mat a,b,c,d; 1784 PetscBool flg; 1785 PetscErrorCode ierr; 1786 1787 PetscFunctionBegin; 1788 a = matA->A; b = matA->B; 1789 c = matB->A; d = matB->B; 1790 1791 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1792 if (flg) { 1793 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1794 } 1795 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "MatCopy_MPIAIJ" 1801 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1802 { 1803 PetscErrorCode ierr; 1804 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1805 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1806 1807 PetscFunctionBegin; 1808 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1809 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1810 /* because of the column compression in the off-processor part of the matrix a->B, 1811 the number of columns in a->B and b->B may be different, hence we cannot call 1812 the MatCopy() directly on the two parts. If need be, we can provide a more 1813 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1814 then copying the submatrices */ 1815 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1816 } else { 1817 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1818 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1819 } 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1825 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1826 { 1827 PetscErrorCode ierr; 1828 1829 PetscFunctionBegin; 1830 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatAXPY_MPIAIJ" 1836 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1837 { 1838 PetscErrorCode ierr; 1839 PetscInt i; 1840 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1841 PetscBLASInt bnz,one=1; 1842 Mat_SeqAIJ *x,*y; 1843 1844 PetscFunctionBegin; 1845 if (str == SAME_NONZERO_PATTERN) { 1846 PetscScalar alpha = a; 1847 x = (Mat_SeqAIJ *)xx->A->data; 1848 y = (Mat_SeqAIJ *)yy->A->data; 1849 bnz = PetscBLASIntCast(x->nz); 1850 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1851 x = (Mat_SeqAIJ *)xx->B->data; 1852 y = (Mat_SeqAIJ *)yy->B->data; 1853 bnz = PetscBLASIntCast(x->nz); 1854 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1855 } else if (str == SUBSET_NONZERO_PATTERN) { 1856 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1857 1858 x = (Mat_SeqAIJ *)xx->B->data; 1859 y = (Mat_SeqAIJ *)yy->B->data; 1860 if (y->xtoy && y->XtoY != xx->B) { 1861 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1862 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1863 } 1864 if (!y->xtoy) { /* get xtoy */ 1865 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1866 y->XtoY = xx->B; 1867 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1868 } 1869 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1870 } else { 1871 Mat B; 1872 PetscInt *nnz_d,*nnz_o; 1873 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 1874 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 1875 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 1876 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1877 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1878 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 1879 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1880 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 1881 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 1882 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1883 ierr = MatHeaderReplace(Y,B); 1884 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1885 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1886 } 1887 PetscFunctionReturn(0); 1888 } 1889 1890 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatConjugate_MPIAIJ" 1894 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1895 { 1896 #if defined(PETSC_USE_COMPLEX) 1897 PetscErrorCode ierr; 1898 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1899 1900 PetscFunctionBegin; 1901 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1902 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1903 #else 1904 PetscFunctionBegin; 1905 #endif 1906 PetscFunctionReturn(0); 1907 } 1908 1909 #undef __FUNCT__ 1910 #define __FUNCT__ "MatRealPart_MPIAIJ" 1911 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1912 { 1913 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1914 PetscErrorCode ierr; 1915 1916 PetscFunctionBegin; 1917 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1918 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1924 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1925 { 1926 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1927 PetscErrorCode ierr; 1928 1929 PetscFunctionBegin; 1930 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1931 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #ifdef PETSC_HAVE_PBGL 1936 1937 #include <boost/parallel/mpi/bsp_process_group.hpp> 1938 #include <boost/graph/distributed/ilu_default_graph.hpp> 1939 #include <boost/graph/distributed/ilu_0_block.hpp> 1940 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1941 #include <boost/graph/distributed/petsc/interface.hpp> 1942 #include <boost/multi_array.hpp> 1943 #include <boost/parallel/distributed_property_map->hpp> 1944 1945 #undef __FUNCT__ 1946 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1947 /* 1948 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1949 */ 1950 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1951 { 1952 namespace petsc = boost::distributed::petsc; 1953 1954 namespace graph_dist = boost::graph::distributed; 1955 using boost::graph::distributed::ilu_default::process_group_type; 1956 using boost::graph::ilu_permuted; 1957 1958 PetscBool row_identity, col_identity; 1959 PetscContainer c; 1960 PetscInt m, n, M, N; 1961 PetscErrorCode ierr; 1962 1963 PetscFunctionBegin; 1964 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1965 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1966 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1967 if (!row_identity || !col_identity) { 1968 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1969 } 1970 1971 process_group_type pg; 1972 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1973 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1974 lgraph_type& level_graph = *lgraph_p; 1975 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1976 1977 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1978 ilu_permuted(level_graph); 1979 1980 /* put together the new matrix */ 1981 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1982 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1983 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1984 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1985 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1986 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1987 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 1989 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1990 ierr = PetscContainerSetPointer(c, lgraph_p); 1991 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1992 PetscFunctionReturn(0); 1993 } 1994 1995 #undef __FUNCT__ 1996 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1997 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1998 { 1999 PetscFunctionBegin; 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatSolve_MPIAIJ" 2005 /* 2006 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2007 */ 2008 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2009 { 2010 namespace graph_dist = boost::graph::distributed; 2011 2012 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2013 lgraph_type* lgraph_p; 2014 PetscContainer c; 2015 PetscErrorCode ierr; 2016 2017 PetscFunctionBegin; 2018 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2019 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2020 ierr = VecCopy(b, x);CHKERRQ(ierr); 2021 2022 PetscScalar* array_x; 2023 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2024 PetscInt sx; 2025 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2026 2027 PetscScalar* array_b; 2028 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2029 PetscInt sb; 2030 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2031 2032 lgraph_type& level_graph = *lgraph_p; 2033 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2034 2035 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2036 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2037 ref_x(array_x, boost::extents[num_vertices(graph)]); 2038 2039 typedef boost::iterator_property_map<array_ref_type::iterator, 2040 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2041 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2042 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2043 2044 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2045 2046 PetscFunctionReturn(0); 2047 } 2048 #endif 2049 2050 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2051 PetscInt nzlocal,nsends,nrecvs; 2052 PetscMPIInt *send_rank,*recv_rank; 2053 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2054 PetscScalar *sbuf_a,**rbuf_a; 2055 PetscErrorCode (*MatDestroy)(Mat); 2056 } Mat_Redundant; 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2060 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2061 { 2062 PetscErrorCode ierr; 2063 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2064 PetscInt i; 2065 2066 PetscFunctionBegin; 2067 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2068 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2069 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2070 for (i=0; i<redund->nrecvs; i++){ 2071 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2072 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2073 } 2074 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2075 ierr = PetscFree(redund);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatDestroy_MatRedundant" 2081 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2082 { 2083 PetscErrorCode ierr; 2084 PetscContainer container; 2085 Mat_Redundant *redund=PETSC_NULL; 2086 2087 PetscFunctionBegin; 2088 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2089 if (container) { 2090 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2091 } else { 2092 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2093 } 2094 A->ops->destroy = redund->MatDestroy; 2095 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2096 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2097 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2098 PetscFunctionReturn(0); 2099 } 2100 2101 #undef __FUNCT__ 2102 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2103 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2104 { 2105 PetscMPIInt rank,size; 2106 MPI_Comm comm=((PetscObject)mat)->comm; 2107 PetscErrorCode ierr; 2108 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2109 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2110 PetscInt *rowrange=mat->rmap->range; 2111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2112 Mat A=aij->A,B=aij->B,C=*matredundant; 2113 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2114 PetscScalar *sbuf_a; 2115 PetscInt nzlocal=a->nz+b->nz; 2116 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2117 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2118 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2119 MatScalar *aworkA,*aworkB; 2120 PetscScalar *vals; 2121 PetscMPIInt tag1,tag2,tag3,imdex; 2122 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2123 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2124 MPI_Status recv_status,*send_status; 2125 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2126 PetscInt **rbuf_j=PETSC_NULL; 2127 PetscScalar **rbuf_a=PETSC_NULL; 2128 Mat_Redundant *redund=PETSC_NULL; 2129 PetscContainer container; 2130 2131 PetscFunctionBegin; 2132 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2133 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2134 2135 if (reuse == MAT_REUSE_MATRIX) { 2136 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2137 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2138 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2139 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2140 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2141 if (container) { 2142 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2143 } else { 2144 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2145 } 2146 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2147 2148 nsends = redund->nsends; 2149 nrecvs = redund->nrecvs; 2150 send_rank = redund->send_rank; 2151 recv_rank = redund->recv_rank; 2152 sbuf_nz = redund->sbuf_nz; 2153 rbuf_nz = redund->rbuf_nz; 2154 sbuf_j = redund->sbuf_j; 2155 sbuf_a = redund->sbuf_a; 2156 rbuf_j = redund->rbuf_j; 2157 rbuf_a = redund->rbuf_a; 2158 } 2159 2160 if (reuse == MAT_INITIAL_MATRIX){ 2161 PetscMPIInt subrank,subsize; 2162 PetscInt nleftover,np_subcomm; 2163 /* get the destination processors' id send_rank, nsends and nrecvs */ 2164 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2165 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2166 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2167 np_subcomm = size/nsubcomm; 2168 nleftover = size - nsubcomm*np_subcomm; 2169 nsends = 0; nrecvs = 0; 2170 for (i=0; i<size; i++){ /* i=rank*/ 2171 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2172 send_rank[nsends] = i; nsends++; 2173 recv_rank[nrecvs++] = i; 2174 } 2175 } 2176 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2177 i = size-nleftover-1; 2178 j = 0; 2179 while (j < nsubcomm - nleftover){ 2180 send_rank[nsends++] = i; 2181 i--; j++; 2182 } 2183 } 2184 2185 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2186 for (i=0; i<nleftover; i++){ 2187 recv_rank[nrecvs++] = size-nleftover+i; 2188 } 2189 } 2190 2191 /* allocate sbuf_j, sbuf_a */ 2192 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2193 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2194 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2195 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2196 2197 /* copy mat's local entries into the buffers */ 2198 if (reuse == MAT_INITIAL_MATRIX){ 2199 rownz_max = 0; 2200 rptr = sbuf_j; 2201 cols = sbuf_j + rend-rstart + 1; 2202 vals = sbuf_a; 2203 rptr[0] = 0; 2204 for (i=0; i<rend-rstart; i++){ 2205 row = i + rstart; 2206 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2207 ncols = nzA + nzB; 2208 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2209 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2210 /* load the column indices for this row into cols */ 2211 lwrite = 0; 2212 for (l=0; l<nzB; l++) { 2213 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2214 vals[lwrite] = aworkB[l]; 2215 cols[lwrite++] = ctmp; 2216 } 2217 } 2218 for (l=0; l<nzA; l++){ 2219 vals[lwrite] = aworkA[l]; 2220 cols[lwrite++] = cstart + cworkA[l]; 2221 } 2222 for (l=0; l<nzB; l++) { 2223 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2224 vals[lwrite] = aworkB[l]; 2225 cols[lwrite++] = ctmp; 2226 } 2227 } 2228 vals += ncols; 2229 cols += ncols; 2230 rptr[i+1] = rptr[i] + ncols; 2231 if (rownz_max < ncols) rownz_max = ncols; 2232 } 2233 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2234 } else { /* only copy matrix values into sbuf_a */ 2235 rptr = sbuf_j; 2236 vals = sbuf_a; 2237 rptr[0] = 0; 2238 for (i=0; i<rend-rstart; i++){ 2239 row = i + rstart; 2240 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2241 ncols = nzA + nzB; 2242 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2243 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2244 lwrite = 0; 2245 for (l=0; l<nzB; l++) { 2246 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2247 } 2248 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2249 for (l=0; l<nzB; l++) { 2250 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2251 } 2252 vals += ncols; 2253 rptr[i+1] = rptr[i] + ncols; 2254 } 2255 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2256 2257 /* send nzlocal to others, and recv other's nzlocal */ 2258 /*--------------------------------------------------*/ 2259 if (reuse == MAT_INITIAL_MATRIX){ 2260 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2261 s_waits2 = s_waits3 + nsends; 2262 s_waits1 = s_waits2 + nsends; 2263 r_waits1 = s_waits1 + nsends; 2264 r_waits2 = r_waits1 + nrecvs; 2265 r_waits3 = r_waits2 + nrecvs; 2266 } else { 2267 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2268 r_waits3 = s_waits3 + nsends; 2269 } 2270 2271 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2272 if (reuse == MAT_INITIAL_MATRIX){ 2273 /* get new tags to keep the communication clean */ 2274 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2275 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2276 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2277 2278 /* post receives of other's nzlocal */ 2279 for (i=0; i<nrecvs; i++){ 2280 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2281 } 2282 /* send nzlocal to others */ 2283 for (i=0; i<nsends; i++){ 2284 sbuf_nz[i] = nzlocal; 2285 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2286 } 2287 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2288 count = nrecvs; 2289 while (count) { 2290 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2291 recv_rank[imdex] = recv_status.MPI_SOURCE; 2292 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2293 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2294 2295 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2296 rbuf_nz[imdex] += i + 2; 2297 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2298 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2299 count--; 2300 } 2301 /* wait on sends of nzlocal */ 2302 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2303 /* send mat->i,j to others, and recv from other's */ 2304 /*------------------------------------------------*/ 2305 for (i=0; i<nsends; i++){ 2306 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2307 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2308 } 2309 /* wait on receives of mat->i,j */ 2310 /*------------------------------*/ 2311 count = nrecvs; 2312 while (count) { 2313 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2314 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2315 count--; 2316 } 2317 /* wait on sends of mat->i,j */ 2318 /*---------------------------*/ 2319 if (nsends) { 2320 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2321 } 2322 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2323 2324 /* post receives, send and receive mat->a */ 2325 /*----------------------------------------*/ 2326 for (imdex=0; imdex<nrecvs; imdex++) { 2327 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<nsends; i++){ 2330 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2331 } 2332 count = nrecvs; 2333 while (count) { 2334 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2335 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2336 count--; 2337 } 2338 if (nsends) { 2339 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2340 } 2341 2342 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2343 2344 /* create redundant matrix */ 2345 /*-------------------------*/ 2346 if (reuse == MAT_INITIAL_MATRIX){ 2347 /* compute rownz_max for preallocation */ 2348 for (imdex=0; imdex<nrecvs; imdex++){ 2349 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2350 rptr = rbuf_j[imdex]; 2351 for (i=0; i<j; i++){ 2352 ncols = rptr[i+1] - rptr[i]; 2353 if (rownz_max < ncols) rownz_max = ncols; 2354 } 2355 } 2356 2357 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2358 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2359 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2360 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2361 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2362 } else { 2363 C = *matredundant; 2364 } 2365 2366 /* insert local matrix entries */ 2367 rptr = sbuf_j; 2368 cols = sbuf_j + rend-rstart + 1; 2369 vals = sbuf_a; 2370 for (i=0; i<rend-rstart; i++){ 2371 row = i + rstart; 2372 ncols = rptr[i+1] - rptr[i]; 2373 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2374 vals += ncols; 2375 cols += ncols; 2376 } 2377 /* insert received matrix entries */ 2378 for (imdex=0; imdex<nrecvs; imdex++){ 2379 rstart = rowrange[recv_rank[imdex]]; 2380 rend = rowrange[recv_rank[imdex]+1]; 2381 rptr = rbuf_j[imdex]; 2382 cols = rbuf_j[imdex] + rend-rstart + 1; 2383 vals = rbuf_a[imdex]; 2384 for (i=0; i<rend-rstart; i++){ 2385 row = i + rstart; 2386 ncols = rptr[i+1] - rptr[i]; 2387 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2388 vals += ncols; 2389 cols += ncols; 2390 } 2391 } 2392 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2394 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2395 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2396 if (reuse == MAT_INITIAL_MATRIX){ 2397 PetscContainer container; 2398 *matredundant = C; 2399 /* create a supporting struct and attach it to C for reuse */ 2400 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2401 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2402 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2403 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2404 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2405 2406 redund->nzlocal = nzlocal; 2407 redund->nsends = nsends; 2408 redund->nrecvs = nrecvs; 2409 redund->send_rank = send_rank; 2410 redund->recv_rank = recv_rank; 2411 redund->sbuf_nz = sbuf_nz; 2412 redund->rbuf_nz = rbuf_nz; 2413 redund->sbuf_j = sbuf_j; 2414 redund->sbuf_a = sbuf_a; 2415 redund->rbuf_j = rbuf_j; 2416 redund->rbuf_a = rbuf_a; 2417 2418 redund->MatDestroy = C->ops->destroy; 2419 C->ops->destroy = MatDestroy_MatRedundant; 2420 } 2421 PetscFunctionReturn(0); 2422 } 2423 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2426 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2427 { 2428 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2429 PetscErrorCode ierr; 2430 PetscInt i,*idxb = 0; 2431 PetscScalar *va,*vb; 2432 Vec vtmp; 2433 2434 PetscFunctionBegin; 2435 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2436 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2437 if (idx) { 2438 for (i=0; i<A->rmap->n; i++) { 2439 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2440 } 2441 } 2442 2443 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2444 if (idx) { 2445 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2446 } 2447 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2448 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2449 2450 for (i=0; i<A->rmap->n; i++){ 2451 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2452 va[i] = vb[i]; 2453 if (idx) idx[i] = a->garray[idxb[i]]; 2454 } 2455 } 2456 2457 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2458 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2459 if (idxb) { 2460 ierr = PetscFree(idxb);CHKERRQ(ierr); 2461 } 2462 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 #undef __FUNCT__ 2467 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2468 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2469 { 2470 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2471 PetscErrorCode ierr; 2472 PetscInt i,*idxb = 0; 2473 PetscScalar *va,*vb; 2474 Vec vtmp; 2475 2476 PetscFunctionBegin; 2477 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2478 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2479 if (idx) { 2480 for (i=0; i<A->cmap->n; i++) { 2481 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2482 } 2483 } 2484 2485 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2486 if (idx) { 2487 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2488 } 2489 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2490 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2491 2492 for (i=0; i<A->rmap->n; i++){ 2493 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2494 va[i] = vb[i]; 2495 if (idx) idx[i] = a->garray[idxb[i]]; 2496 } 2497 } 2498 2499 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2500 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2501 if (idxb) { 2502 ierr = PetscFree(idxb);CHKERRQ(ierr); 2503 } 2504 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2505 PetscFunctionReturn(0); 2506 } 2507 2508 #undef __FUNCT__ 2509 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2510 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2511 { 2512 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2513 PetscInt n = A->rmap->n; 2514 PetscInt cstart = A->cmap->rstart; 2515 PetscInt *cmap = mat->garray; 2516 PetscInt *diagIdx, *offdiagIdx; 2517 Vec diagV, offdiagV; 2518 PetscScalar *a, *diagA, *offdiagA; 2519 PetscInt r; 2520 PetscErrorCode ierr; 2521 2522 PetscFunctionBegin; 2523 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2524 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2525 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2526 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2527 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2528 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2529 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2530 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2531 for(r = 0; r < n; ++r) { 2532 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2533 a[r] = diagA[r]; 2534 idx[r] = cstart + diagIdx[r]; 2535 } else { 2536 a[r] = offdiagA[r]; 2537 idx[r] = cmap[offdiagIdx[r]]; 2538 } 2539 } 2540 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2541 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2542 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2543 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2544 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2545 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 #undef __FUNCT__ 2550 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2551 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2552 { 2553 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2554 PetscInt n = A->rmap->n; 2555 PetscInt cstart = A->cmap->rstart; 2556 PetscInt *cmap = mat->garray; 2557 PetscInt *diagIdx, *offdiagIdx; 2558 Vec diagV, offdiagV; 2559 PetscScalar *a, *diagA, *offdiagA; 2560 PetscInt r; 2561 PetscErrorCode ierr; 2562 2563 PetscFunctionBegin; 2564 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2565 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2566 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2567 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2568 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2569 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2570 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2571 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2572 for(r = 0; r < n; ++r) { 2573 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2574 a[r] = diagA[r]; 2575 idx[r] = cstart + diagIdx[r]; 2576 } else { 2577 a[r] = offdiagA[r]; 2578 idx[r] = cmap[offdiagIdx[r]]; 2579 } 2580 } 2581 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2582 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2583 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2584 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2585 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2586 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 #undef __FUNCT__ 2591 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2592 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2593 { 2594 PetscErrorCode ierr; 2595 Mat *dummy; 2596 2597 PetscFunctionBegin; 2598 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2599 *newmat = *dummy; 2600 ierr = PetscFree(dummy);CHKERRQ(ierr); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2605 /* -------------------------------------------------------------------*/ 2606 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2607 MatGetRow_MPIAIJ, 2608 MatRestoreRow_MPIAIJ, 2609 MatMult_MPIAIJ, 2610 /* 4*/ MatMultAdd_MPIAIJ, 2611 MatMultTranspose_MPIAIJ, 2612 MatMultTransposeAdd_MPIAIJ, 2613 #ifdef PETSC_HAVE_PBGL 2614 MatSolve_MPIAIJ, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 /*10*/ 0, 2621 0, 2622 0, 2623 MatSOR_MPIAIJ, 2624 MatTranspose_MPIAIJ, 2625 /*15*/ MatGetInfo_MPIAIJ, 2626 MatEqual_MPIAIJ, 2627 MatGetDiagonal_MPIAIJ, 2628 MatDiagonalScale_MPIAIJ, 2629 MatNorm_MPIAIJ, 2630 /*20*/ MatAssemblyBegin_MPIAIJ, 2631 MatAssemblyEnd_MPIAIJ, 2632 MatSetOption_MPIAIJ, 2633 MatZeroEntries_MPIAIJ, 2634 /*24*/ MatZeroRows_MPIAIJ, 2635 0, 2636 #ifdef PETSC_HAVE_PBGL 2637 0, 2638 #else 2639 0, 2640 #endif 2641 0, 2642 0, 2643 /*29*/ MatSetUpPreallocation_MPIAIJ, 2644 #ifdef PETSC_HAVE_PBGL 2645 0, 2646 #else 2647 0, 2648 #endif 2649 0, 2650 0, 2651 0, 2652 /*34*/ MatDuplicate_MPIAIJ, 2653 0, 2654 0, 2655 0, 2656 0, 2657 /*39*/ MatAXPY_MPIAIJ, 2658 MatGetSubMatrices_MPIAIJ, 2659 MatIncreaseOverlap_MPIAIJ, 2660 MatGetValues_MPIAIJ, 2661 MatCopy_MPIAIJ, 2662 /*44*/ MatGetRowMax_MPIAIJ, 2663 MatScale_MPIAIJ, 2664 0, 2665 0, 2666 0, 2667 /*49*/ MatSetBlockSize_MPIAIJ, 2668 0, 2669 0, 2670 0, 2671 0, 2672 /*54*/ MatFDColoringCreate_MPIAIJ, 2673 0, 2674 MatSetUnfactored_MPIAIJ, 2675 MatPermute_MPIAIJ, 2676 0, 2677 /*59*/ MatGetSubMatrix_MPIAIJ, 2678 MatDestroy_MPIAIJ, 2679 MatView_MPIAIJ, 2680 0, 2681 0, 2682 /*64*/ 0, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2688 MatGetRowMinAbs_MPIAIJ, 2689 0, 2690 MatSetColoring_MPIAIJ, 2691 #if defined(PETSC_HAVE_ADIC) 2692 MatSetValuesAdic_MPIAIJ, 2693 #else 2694 0, 2695 #endif 2696 MatSetValuesAdifor_MPIAIJ, 2697 /*75*/ MatFDColoringApply_AIJ, 2698 0, 2699 0, 2700 0, 2701 0, 2702 /*80*/ 0, 2703 0, 2704 0, 2705 /*83*/ MatLoad_MPIAIJ, 2706 0, 2707 0, 2708 0, 2709 0, 2710 0, 2711 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2712 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2713 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2714 MatPtAP_Basic, 2715 MatPtAPSymbolic_MPIAIJ, 2716 /*94*/ MatPtAPNumeric_MPIAIJ, 2717 0, 2718 0, 2719 0, 2720 0, 2721 /*99*/ 0, 2722 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2723 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2724 MatConjugate_MPIAIJ, 2725 0, 2726 /*104*/MatSetValuesRow_MPIAIJ, 2727 MatRealPart_MPIAIJ, 2728 MatImaginaryPart_MPIAIJ, 2729 0, 2730 0, 2731 /*109*/0, 2732 MatGetRedundantMatrix_MPIAIJ, 2733 MatGetRowMin_MPIAIJ, 2734 0, 2735 0, 2736 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2737 0, 2738 0, 2739 0, 2740 0, 2741 /*119*/0, 2742 0, 2743 0, 2744 0, 2745 MatGetMultiProcBlock_MPIAIJ 2746 }; 2747 2748 /* ----------------------------------------------------------------------------------------*/ 2749 2750 EXTERN_C_BEGIN 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2753 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2754 { 2755 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2756 PetscErrorCode ierr; 2757 2758 PetscFunctionBegin; 2759 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2760 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2761 PetscFunctionReturn(0); 2762 } 2763 EXTERN_C_END 2764 2765 EXTERN_C_BEGIN 2766 #undef __FUNCT__ 2767 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2768 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2769 { 2770 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2771 PetscErrorCode ierr; 2772 2773 PetscFunctionBegin; 2774 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2775 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2776 PetscFunctionReturn(0); 2777 } 2778 EXTERN_C_END 2779 2780 EXTERN_C_BEGIN 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2783 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2784 { 2785 Mat_MPIAIJ *b; 2786 PetscErrorCode ierr; 2787 PetscInt i; 2788 2789 PetscFunctionBegin; 2790 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2791 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2792 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2793 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2794 2795 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2796 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2797 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2798 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2799 if (d_nnz) { 2800 for (i=0; i<B->rmap->n; i++) { 2801 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2802 } 2803 } 2804 if (o_nnz) { 2805 for (i=0; i<B->rmap->n; i++) { 2806 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2807 } 2808 } 2809 b = (Mat_MPIAIJ*)B->data; 2810 2811 if (!B->preallocated) { 2812 /* Explicitly create 2 MATSEQAIJ matrices. */ 2813 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2814 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2815 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2816 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2817 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2818 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2819 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2820 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2821 } 2822 2823 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2824 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2825 B->preallocated = PETSC_TRUE; 2826 PetscFunctionReturn(0); 2827 } 2828 EXTERN_C_END 2829 2830 #undef __FUNCT__ 2831 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2832 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2833 { 2834 Mat mat; 2835 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2836 PetscErrorCode ierr; 2837 2838 PetscFunctionBegin; 2839 *newmat = 0; 2840 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2841 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2842 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2843 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2844 a = (Mat_MPIAIJ*)mat->data; 2845 2846 mat->factortype = matin->factortype; 2847 mat->rmap->bs = matin->rmap->bs; 2848 mat->assembled = PETSC_TRUE; 2849 mat->insertmode = NOT_SET_VALUES; 2850 mat->preallocated = PETSC_TRUE; 2851 2852 a->size = oldmat->size; 2853 a->rank = oldmat->rank; 2854 a->donotstash = oldmat->donotstash; 2855 a->roworiented = oldmat->roworiented; 2856 a->rowindices = 0; 2857 a->rowvalues = 0; 2858 a->getrowactive = PETSC_FALSE; 2859 2860 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2861 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2862 2863 if (oldmat->colmap) { 2864 #if defined (PETSC_USE_CTABLE) 2865 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2866 #else 2867 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2868 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2869 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2870 #endif 2871 } else a->colmap = 0; 2872 if (oldmat->garray) { 2873 PetscInt len; 2874 len = oldmat->B->cmap->n; 2875 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2876 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2877 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2878 } else a->garray = 0; 2879 2880 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2881 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2882 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2883 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2884 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2885 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2886 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2887 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2888 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2889 *newmat = mat; 2890 PetscFunctionReturn(0); 2891 } 2892 2893 /* 2894 Allows sending/receiving larger messages then 2 gigabytes in a single call 2895 */ 2896 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 2897 { 2898 int ierr; 2899 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2900 PetscInt i,numchunks; 2901 PetscMPIInt icnt; 2902 2903 numchunks = cnt/CHUNKSIZE + 1; 2904 for (i=0; i<numchunks; i++) { 2905 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2906 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 2907 if (type == MPIU_INT) { 2908 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2909 } else if (type == MPIU_SCALAR) { 2910 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2911 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2912 } 2913 return 0; 2914 } 2915 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 2916 { 2917 int ierr; 2918 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2919 MPI_Status status; 2920 PetscInt i,numchunks; 2921 PetscMPIInt icnt; 2922 2923 numchunks = cnt/CHUNKSIZE + 1; 2924 for (i=0; i<numchunks; i++) { 2925 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2926 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 2927 if (type == MPIU_INT) { 2928 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2929 } else if (type == MPIU_SCALAR) { 2930 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2931 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2932 } 2933 return 0; 2934 } 2935 2936 #undef __FUNCT__ 2937 #define __FUNCT__ "MatLoad_MPIAIJ" 2938 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2939 { 2940 PetscScalar *vals,*svals; 2941 MPI_Comm comm = ((PetscObject)viewer)->comm; 2942 PetscErrorCode ierr; 2943 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2944 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2945 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2946 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2947 PetscInt cend,cstart,n,*rowners,sizesset=1; 2948 int fd; 2949 2950 PetscFunctionBegin; 2951 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2952 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2953 if (!rank) { 2954 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2955 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2956 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2957 } 2958 2959 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2960 2961 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2962 M = header[1]; N = header[2]; 2963 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2964 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2965 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2966 2967 /* If global sizes are set, check if they are consistent with that given in the file */ 2968 if (sizesset) { 2969 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2970 } 2971 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2972 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2973 2974 /* determine ownership of all rows */ 2975 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2976 else m = newMat->rmap->n; /* Set by user */ 2977 2978 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2979 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2980 2981 /* First process needs enough room for process with most rows */ 2982 if (!rank) { 2983 mmax = rowners[1]; 2984 for (i=2; i<size; i++) { 2985 mmax = PetscMax(mmax,rowners[i]); 2986 } 2987 } else mmax = m; 2988 2989 rowners[0] = 0; 2990 for (i=2; i<=size; i++) { 2991 rowners[i] += rowners[i-1]; 2992 } 2993 rstart = rowners[rank]; 2994 rend = rowners[rank+1]; 2995 2996 /* distribute row lengths to all processors */ 2997 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2998 if (!rank) { 2999 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3000 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3001 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3002 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3003 for (j=0; j<m; j++) { 3004 procsnz[0] += ourlens[j]; 3005 } 3006 for (i=1; i<size; i++) { 3007 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3008 /* calculate the number of nonzeros on each processor */ 3009 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3010 procsnz[i] += rowlengths[j]; 3011 } 3012 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3013 } 3014 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3015 } else { 3016 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3017 } 3018 3019 if (!rank) { 3020 /* determine max buffer needed and allocate it */ 3021 maxnz = 0; 3022 for (i=0; i<size; i++) { 3023 maxnz = PetscMax(maxnz,procsnz[i]); 3024 } 3025 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3026 3027 /* read in my part of the matrix column indices */ 3028 nz = procsnz[0]; 3029 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3030 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3031 3032 /* read in every one elses and ship off */ 3033 for (i=1; i<size; i++) { 3034 nz = procsnz[i]; 3035 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3036 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3037 } 3038 ierr = PetscFree(cols);CHKERRQ(ierr); 3039 } else { 3040 /* determine buffer space needed for message */ 3041 nz = 0; 3042 for (i=0; i<m; i++) { 3043 nz += ourlens[i]; 3044 } 3045 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3046 3047 /* receive message of column indices*/ 3048 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3049 } 3050 3051 /* determine column ownership if matrix is not square */ 3052 if (N != M) { 3053 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3054 else n = newMat->cmap->n; 3055 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3056 cstart = cend - n; 3057 } else { 3058 cstart = rstart; 3059 cend = rend; 3060 n = cend - cstart; 3061 } 3062 3063 /* loop over local rows, determining number of off diagonal entries */ 3064 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3065 jj = 0; 3066 for (i=0; i<m; i++) { 3067 for (j=0; j<ourlens[i]; j++) { 3068 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3069 jj++; 3070 } 3071 } 3072 3073 for (i=0; i<m; i++) { 3074 ourlens[i] -= offlens[i]; 3075 } 3076 if (!sizesset) { 3077 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3078 } 3079 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3080 3081 for (i=0; i<m; i++) { 3082 ourlens[i] += offlens[i]; 3083 } 3084 3085 if (!rank) { 3086 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3087 3088 /* read in my part of the matrix numerical values */ 3089 nz = procsnz[0]; 3090 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3091 3092 /* insert into matrix */ 3093 jj = rstart; 3094 smycols = mycols; 3095 svals = vals; 3096 for (i=0; i<m; i++) { 3097 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3098 smycols += ourlens[i]; 3099 svals += ourlens[i]; 3100 jj++; 3101 } 3102 3103 /* read in other processors and ship out */ 3104 for (i=1; i<size; i++) { 3105 nz = procsnz[i]; 3106 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3107 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3108 } 3109 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3110 } else { 3111 /* receive numeric values */ 3112 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3113 3114 /* receive message of values*/ 3115 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3116 3117 /* insert into matrix */ 3118 jj = rstart; 3119 smycols = mycols; 3120 svals = vals; 3121 for (i=0; i<m; i++) { 3122 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3123 smycols += ourlens[i]; 3124 svals += ourlens[i]; 3125 jj++; 3126 } 3127 } 3128 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3129 ierr = PetscFree(vals);CHKERRQ(ierr); 3130 ierr = PetscFree(mycols);CHKERRQ(ierr); 3131 ierr = PetscFree(rowners);CHKERRQ(ierr); 3132 3133 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3134 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3135 PetscFunctionReturn(0); 3136 } 3137 3138 #undef __FUNCT__ 3139 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3140 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3141 { 3142 PetscErrorCode ierr; 3143 IS iscol_local; 3144 PetscInt csize; 3145 3146 PetscFunctionBegin; 3147 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3148 if (call == MAT_REUSE_MATRIX) { 3149 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3150 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3151 } else { 3152 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3153 } 3154 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3155 if (call == MAT_INITIAL_MATRIX) { 3156 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3157 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3158 } 3159 PetscFunctionReturn(0); 3160 } 3161 3162 #undef __FUNCT__ 3163 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3164 /* 3165 Not great since it makes two copies of the submatrix, first an SeqAIJ 3166 in local and then by concatenating the local matrices the end result. 3167 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3168 3169 Note: This requires a sequential iscol with all indices. 3170 */ 3171 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3172 { 3173 PetscErrorCode ierr; 3174 PetscMPIInt rank,size; 3175 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3176 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3177 Mat *local,M,Mreuse; 3178 MatScalar *vwork,*aa; 3179 MPI_Comm comm = ((PetscObject)mat)->comm; 3180 Mat_SeqAIJ *aij; 3181 3182 3183 PetscFunctionBegin; 3184 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3185 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3186 3187 if (call == MAT_REUSE_MATRIX) { 3188 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3189 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3190 local = &Mreuse; 3191 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3192 } else { 3193 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3194 Mreuse = *local; 3195 ierr = PetscFree(local);CHKERRQ(ierr); 3196 } 3197 3198 /* 3199 m - number of local rows 3200 n - number of columns (same on all processors) 3201 rstart - first row in new global matrix generated 3202 */ 3203 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3204 if (call == MAT_INITIAL_MATRIX) { 3205 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3206 ii = aij->i; 3207 jj = aij->j; 3208 3209 /* 3210 Determine the number of non-zeros in the diagonal and off-diagonal 3211 portions of the matrix in order to do correct preallocation 3212 */ 3213 3214 /* first get start and end of "diagonal" columns */ 3215 if (csize == PETSC_DECIDE) { 3216 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3217 if (mglobal == n) { /* square matrix */ 3218 nlocal = m; 3219 } else { 3220 nlocal = n/size + ((n % size) > rank); 3221 } 3222 } else { 3223 nlocal = csize; 3224 } 3225 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3226 rstart = rend - nlocal; 3227 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3228 3229 /* next, compute all the lengths */ 3230 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3231 olens = dlens + m; 3232 for (i=0; i<m; i++) { 3233 jend = ii[i+1] - ii[i]; 3234 olen = 0; 3235 dlen = 0; 3236 for (j=0; j<jend; j++) { 3237 if (*jj < rstart || *jj >= rend) olen++; 3238 else dlen++; 3239 jj++; 3240 } 3241 olens[i] = olen; 3242 dlens[i] = dlen; 3243 } 3244 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3245 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3246 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3247 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3248 ierr = PetscFree(dlens);CHKERRQ(ierr); 3249 } else { 3250 PetscInt ml,nl; 3251 3252 M = *newmat; 3253 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3254 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3255 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3256 /* 3257 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3258 rather than the slower MatSetValues(). 3259 */ 3260 M->was_assembled = PETSC_TRUE; 3261 M->assembled = PETSC_FALSE; 3262 } 3263 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3264 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3265 ii = aij->i; 3266 jj = aij->j; 3267 aa = aij->a; 3268 for (i=0; i<m; i++) { 3269 row = rstart + i; 3270 nz = ii[i+1] - ii[i]; 3271 cwork = jj; jj += nz; 3272 vwork = aa; aa += nz; 3273 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3274 } 3275 3276 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3277 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3278 *newmat = M; 3279 3280 /* save submatrix used in processor for next request */ 3281 if (call == MAT_INITIAL_MATRIX) { 3282 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3283 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3284 } 3285 3286 PetscFunctionReturn(0); 3287 } 3288 3289 EXTERN_C_BEGIN 3290 #undef __FUNCT__ 3291 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3292 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3293 { 3294 PetscInt m,cstart, cend,j,nnz,i,d; 3295 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3296 const PetscInt *JJ; 3297 PetscScalar *values; 3298 PetscErrorCode ierr; 3299 3300 PetscFunctionBegin; 3301 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3302 3303 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3304 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3305 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3306 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3307 m = B->rmap->n; 3308 cstart = B->cmap->rstart; 3309 cend = B->cmap->rend; 3310 rstart = B->rmap->rstart; 3311 3312 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3313 3314 #if defined(PETSC_USE_DEBUGGING) 3315 for (i=0; i<m; i++) { 3316 nnz = Ii[i+1]- Ii[i]; 3317 JJ = J + Ii[i]; 3318 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3319 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3320 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3321 } 3322 #endif 3323 3324 for (i=0; i<m; i++) { 3325 nnz = Ii[i+1]- Ii[i]; 3326 JJ = J + Ii[i]; 3327 nnz_max = PetscMax(nnz_max,nnz); 3328 d = 0; 3329 for (j=0; j<nnz; j++) { 3330 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3331 } 3332 d_nnz[i] = d; 3333 o_nnz[i] = nnz - d; 3334 } 3335 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3336 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3337 3338 if (v) values = (PetscScalar*)v; 3339 else { 3340 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3341 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3342 } 3343 3344 for (i=0; i<m; i++) { 3345 ii = i + rstart; 3346 nnz = Ii[i+1]- Ii[i]; 3347 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3348 } 3349 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3350 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3351 3352 if (!v) { 3353 ierr = PetscFree(values);CHKERRQ(ierr); 3354 } 3355 PetscFunctionReturn(0); 3356 } 3357 EXTERN_C_END 3358 3359 #undef __FUNCT__ 3360 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3361 /*@ 3362 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3363 (the default parallel PETSc format). 3364 3365 Collective on MPI_Comm 3366 3367 Input Parameters: 3368 + B - the matrix 3369 . i - the indices into j for the start of each local row (starts with zero) 3370 . j - the column indices for each local row (starts with zero) 3371 - v - optional values in the matrix 3372 3373 Level: developer 3374 3375 Notes: 3376 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3377 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3378 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3379 3380 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3381 3382 The format which is used for the sparse matrix input, is equivalent to a 3383 row-major ordering.. i.e for the following matrix, the input data expected is 3384 as shown: 3385 3386 1 0 0 3387 2 0 3 P0 3388 ------- 3389 4 5 6 P1 3390 3391 Process0 [P0]: rows_owned=[0,1] 3392 i = {0,1,3} [size = nrow+1 = 2+1] 3393 j = {0,0,2} [size = nz = 6] 3394 v = {1,2,3} [size = nz = 6] 3395 3396 Process1 [P1]: rows_owned=[2] 3397 i = {0,3} [size = nrow+1 = 1+1] 3398 j = {0,1,2} [size = nz = 6] 3399 v = {4,5,6} [size = nz = 6] 3400 3401 .keywords: matrix, aij, compressed row, sparse, parallel 3402 3403 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3404 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3405 @*/ 3406 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3407 { 3408 PetscErrorCode ierr; 3409 3410 PetscFunctionBegin; 3411 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3412 PetscFunctionReturn(0); 3413 } 3414 3415 #undef __FUNCT__ 3416 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3417 /*@C 3418 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3419 (the default parallel PETSc format). For good matrix assembly performance 3420 the user should preallocate the matrix storage by setting the parameters 3421 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3422 performance can be increased by more than a factor of 50. 3423 3424 Collective on MPI_Comm 3425 3426 Input Parameters: 3427 + A - the matrix 3428 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3429 (same value is used for all local rows) 3430 . d_nnz - array containing the number of nonzeros in the various rows of the 3431 DIAGONAL portion of the local submatrix (possibly different for each row) 3432 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3433 The size of this array is equal to the number of local rows, i.e 'm'. 3434 You must leave room for the diagonal entry even if it is zero. 3435 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3436 submatrix (same value is used for all local rows). 3437 - o_nnz - array containing the number of nonzeros in the various rows of the 3438 OFF-DIAGONAL portion of the local submatrix (possibly different for 3439 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3440 structure. The size of this array is equal to the number 3441 of local rows, i.e 'm'. 3442 3443 If the *_nnz parameter is given then the *_nz parameter is ignored 3444 3445 The AIJ format (also called the Yale sparse matrix format or 3446 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3447 storage. The stored row and column indices begin with zero. 3448 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3449 3450 The parallel matrix is partitioned such that the first m0 rows belong to 3451 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3452 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3453 3454 The DIAGONAL portion of the local submatrix of a processor can be defined 3455 as the submatrix which is obtained by extraction the part corresponding to 3456 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3457 first row that belongs to the processor, r2 is the last row belonging to 3458 the this processor, and c1-c2 is range of indices of the local part of a 3459 vector suitable for applying the matrix to. This is an mxn matrix. In the 3460 common case of a square matrix, the row and column ranges are the same and 3461 the DIAGONAL part is also square. The remaining portion of the local 3462 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3463 3464 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3465 3466 You can call MatGetInfo() to get information on how effective the preallocation was; 3467 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3468 You can also run with the option -info and look for messages with the string 3469 malloc in them to see if additional memory allocation was needed. 3470 3471 Example usage: 3472 3473 Consider the following 8x8 matrix with 34 non-zero values, that is 3474 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3475 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3476 as follows: 3477 3478 .vb 3479 1 2 0 | 0 3 0 | 0 4 3480 Proc0 0 5 6 | 7 0 0 | 8 0 3481 9 0 10 | 11 0 0 | 12 0 3482 ------------------------------------- 3483 13 0 14 | 15 16 17 | 0 0 3484 Proc1 0 18 0 | 19 20 21 | 0 0 3485 0 0 0 | 22 23 0 | 24 0 3486 ------------------------------------- 3487 Proc2 25 26 27 | 0 0 28 | 29 0 3488 30 0 0 | 31 32 33 | 0 34 3489 .ve 3490 3491 This can be represented as a collection of submatrices as: 3492 3493 .vb 3494 A B C 3495 D E F 3496 G H I 3497 .ve 3498 3499 Where the submatrices A,B,C are owned by proc0, D,E,F are 3500 owned by proc1, G,H,I are owned by proc2. 3501 3502 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3503 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3504 The 'M','N' parameters are 8,8, and have the same values on all procs. 3505 3506 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3507 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3508 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3509 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3510 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3511 matrix, ans [DF] as another SeqAIJ matrix. 3512 3513 When d_nz, o_nz parameters are specified, d_nz storage elements are 3514 allocated for every row of the local diagonal submatrix, and o_nz 3515 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3516 One way to choose d_nz and o_nz is to use the max nonzerors per local 3517 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3518 In this case, the values of d_nz,o_nz are: 3519 .vb 3520 proc0 : dnz = 2, o_nz = 2 3521 proc1 : dnz = 3, o_nz = 2 3522 proc2 : dnz = 1, o_nz = 4 3523 .ve 3524 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3525 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3526 for proc3. i.e we are using 12+15+10=37 storage locations to store 3527 34 values. 3528 3529 When d_nnz, o_nnz parameters are specified, the storage is specified 3530 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3531 In the above case the values for d_nnz,o_nnz are: 3532 .vb 3533 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3534 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3535 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3536 .ve 3537 Here the space allocated is sum of all the above values i.e 34, and 3538 hence pre-allocation is perfect. 3539 3540 Level: intermediate 3541 3542 .keywords: matrix, aij, compressed row, sparse, parallel 3543 3544 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3545 MPIAIJ, MatGetInfo() 3546 @*/ 3547 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3548 { 3549 PetscErrorCode ierr; 3550 3551 PetscFunctionBegin; 3552 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3553 PetscFunctionReturn(0); 3554 } 3555 3556 #undef __FUNCT__ 3557 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3558 /*@ 3559 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3560 CSR format the local rows. 3561 3562 Collective on MPI_Comm 3563 3564 Input Parameters: 3565 + comm - MPI communicator 3566 . m - number of local rows (Cannot be PETSC_DECIDE) 3567 . n - This value should be the same as the local size used in creating the 3568 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3569 calculated if N is given) For square matrices n is almost always m. 3570 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3571 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3572 . i - row indices 3573 . j - column indices 3574 - a - matrix values 3575 3576 Output Parameter: 3577 . mat - the matrix 3578 3579 Level: intermediate 3580 3581 Notes: 3582 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3583 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3584 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3585 3586 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3587 3588 The format which is used for the sparse matrix input, is equivalent to a 3589 row-major ordering.. i.e for the following matrix, the input data expected is 3590 as shown: 3591 3592 1 0 0 3593 2 0 3 P0 3594 ------- 3595 4 5 6 P1 3596 3597 Process0 [P0]: rows_owned=[0,1] 3598 i = {0,1,3} [size = nrow+1 = 2+1] 3599 j = {0,0,2} [size = nz = 6] 3600 v = {1,2,3} [size = nz = 6] 3601 3602 Process1 [P1]: rows_owned=[2] 3603 i = {0,3} [size = nrow+1 = 1+1] 3604 j = {0,1,2} [size = nz = 6] 3605 v = {4,5,6} [size = nz = 6] 3606 3607 .keywords: matrix, aij, compressed row, sparse, parallel 3608 3609 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3610 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3611 @*/ 3612 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3613 { 3614 PetscErrorCode ierr; 3615 3616 PetscFunctionBegin; 3617 if (i[0]) { 3618 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3619 } 3620 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3621 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3622 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3623 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3624 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3625 PetscFunctionReturn(0); 3626 } 3627 3628 #undef __FUNCT__ 3629 #define __FUNCT__ "MatCreateMPIAIJ" 3630 /*@C 3631 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3632 (the default parallel PETSc format). For good matrix assembly performance 3633 the user should preallocate the matrix storage by setting the parameters 3634 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3635 performance can be increased by more than a factor of 50. 3636 3637 Collective on MPI_Comm 3638 3639 Input Parameters: 3640 + comm - MPI communicator 3641 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3642 This value should be the same as the local size used in creating the 3643 y vector for the matrix-vector product y = Ax. 3644 . n - This value should be the same as the local size used in creating the 3645 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3646 calculated if N is given) For square matrices n is almost always m. 3647 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3648 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3649 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3650 (same value is used for all local rows) 3651 . d_nnz - array containing the number of nonzeros in the various rows of the 3652 DIAGONAL portion of the local submatrix (possibly different for each row) 3653 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3654 The size of this array is equal to the number of local rows, i.e 'm'. 3655 You must leave room for the diagonal entry even if it is zero. 3656 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3657 submatrix (same value is used for all local rows). 3658 - o_nnz - array containing the number of nonzeros in the various rows of the 3659 OFF-DIAGONAL portion of the local submatrix (possibly different for 3660 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3661 structure. The size of this array is equal to the number 3662 of local rows, i.e 'm'. 3663 3664 Output Parameter: 3665 . A - the matrix 3666 3667 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3668 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3669 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3670 3671 Notes: 3672 If the *_nnz parameter is given then the *_nz parameter is ignored 3673 3674 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3675 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3676 storage requirements for this matrix. 3677 3678 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3679 processor than it must be used on all processors that share the object for 3680 that argument. 3681 3682 The user MUST specify either the local or global matrix dimensions 3683 (possibly both). 3684 3685 The parallel matrix is partitioned across processors such that the 3686 first m0 rows belong to process 0, the next m1 rows belong to 3687 process 1, the next m2 rows belong to process 2 etc.. where 3688 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3689 values corresponding to [m x N] submatrix. 3690 3691 The columns are logically partitioned with the n0 columns belonging 3692 to 0th partition, the next n1 columns belonging to the next 3693 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3694 3695 The DIAGONAL portion of the local submatrix on any given processor 3696 is the submatrix corresponding to the rows and columns m,n 3697 corresponding to the given processor. i.e diagonal matrix on 3698 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3699 etc. The remaining portion of the local submatrix [m x (N-n)] 3700 constitute the OFF-DIAGONAL portion. The example below better 3701 illustrates this concept. 3702 3703 For a square global matrix we define each processor's diagonal portion 3704 to be its local rows and the corresponding columns (a square submatrix); 3705 each processor's off-diagonal portion encompasses the remainder of the 3706 local matrix (a rectangular submatrix). 3707 3708 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3709 3710 When calling this routine with a single process communicator, a matrix of 3711 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3712 type of communicator, use the construction mechanism: 3713 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3714 3715 By default, this format uses inodes (identical nodes) when possible. 3716 We search for consecutive rows with the same nonzero structure, thereby 3717 reusing matrix information to achieve increased efficiency. 3718 3719 Options Database Keys: 3720 + -mat_no_inode - Do not use inodes 3721 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3722 - -mat_aij_oneindex - Internally use indexing starting at 1 3723 rather than 0. Note that when calling MatSetValues(), 3724 the user still MUST index entries starting at 0! 3725 3726 3727 Example usage: 3728 3729 Consider the following 8x8 matrix with 34 non-zero values, that is 3730 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3731 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3732 as follows: 3733 3734 .vb 3735 1 2 0 | 0 3 0 | 0 4 3736 Proc0 0 5 6 | 7 0 0 | 8 0 3737 9 0 10 | 11 0 0 | 12 0 3738 ------------------------------------- 3739 13 0 14 | 15 16 17 | 0 0 3740 Proc1 0 18 0 | 19 20 21 | 0 0 3741 0 0 0 | 22 23 0 | 24 0 3742 ------------------------------------- 3743 Proc2 25 26 27 | 0 0 28 | 29 0 3744 30 0 0 | 31 32 33 | 0 34 3745 .ve 3746 3747 This can be represented as a collection of submatrices as: 3748 3749 .vb 3750 A B C 3751 D E F 3752 G H I 3753 .ve 3754 3755 Where the submatrices A,B,C are owned by proc0, D,E,F are 3756 owned by proc1, G,H,I are owned by proc2. 3757 3758 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3759 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3760 The 'M','N' parameters are 8,8, and have the same values on all procs. 3761 3762 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3763 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3764 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3765 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3766 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3767 matrix, ans [DF] as another SeqAIJ matrix. 3768 3769 When d_nz, o_nz parameters are specified, d_nz storage elements are 3770 allocated for every row of the local diagonal submatrix, and o_nz 3771 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3772 One way to choose d_nz and o_nz is to use the max nonzerors per local 3773 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3774 In this case, the values of d_nz,o_nz are: 3775 .vb 3776 proc0 : dnz = 2, o_nz = 2 3777 proc1 : dnz = 3, o_nz = 2 3778 proc2 : dnz = 1, o_nz = 4 3779 .ve 3780 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3781 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3782 for proc3. i.e we are using 12+15+10=37 storage locations to store 3783 34 values. 3784 3785 When d_nnz, o_nnz parameters are specified, the storage is specified 3786 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3787 In the above case the values for d_nnz,o_nnz are: 3788 .vb 3789 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3790 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3791 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3792 .ve 3793 Here the space allocated is sum of all the above values i.e 34, and 3794 hence pre-allocation is perfect. 3795 3796 Level: intermediate 3797 3798 .keywords: matrix, aij, compressed row, sparse, parallel 3799 3800 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3801 MPIAIJ, MatCreateMPIAIJWithArrays() 3802 @*/ 3803 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3804 { 3805 PetscErrorCode ierr; 3806 PetscMPIInt size; 3807 3808 PetscFunctionBegin; 3809 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3810 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3811 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3812 if (size > 1) { 3813 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3814 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3815 } else { 3816 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3817 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3818 } 3819 PetscFunctionReturn(0); 3820 } 3821 3822 #undef __FUNCT__ 3823 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3824 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3825 { 3826 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3827 3828 PetscFunctionBegin; 3829 *Ad = a->A; 3830 *Ao = a->B; 3831 *colmap = a->garray; 3832 PetscFunctionReturn(0); 3833 } 3834 3835 #undef __FUNCT__ 3836 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3837 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3838 { 3839 PetscErrorCode ierr; 3840 PetscInt i; 3841 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3842 3843 PetscFunctionBegin; 3844 if (coloring->ctype == IS_COLORING_GLOBAL) { 3845 ISColoringValue *allcolors,*colors; 3846 ISColoring ocoloring; 3847 3848 /* set coloring for diagonal portion */ 3849 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3850 3851 /* set coloring for off-diagonal portion */ 3852 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3853 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3854 for (i=0; i<a->B->cmap->n; i++) { 3855 colors[i] = allcolors[a->garray[i]]; 3856 } 3857 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3858 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3859 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3860 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3861 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3862 ISColoringValue *colors; 3863 PetscInt *larray; 3864 ISColoring ocoloring; 3865 3866 /* set coloring for diagonal portion */ 3867 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3868 for (i=0; i<a->A->cmap->n; i++) { 3869 larray[i] = i + A->cmap->rstart; 3870 } 3871 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3872 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3873 for (i=0; i<a->A->cmap->n; i++) { 3874 colors[i] = coloring->colors[larray[i]]; 3875 } 3876 ierr = PetscFree(larray);CHKERRQ(ierr); 3877 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3878 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3879 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3880 3881 /* set coloring for off-diagonal portion */ 3882 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3883 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3884 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3885 for (i=0; i<a->B->cmap->n; i++) { 3886 colors[i] = coloring->colors[larray[i]]; 3887 } 3888 ierr = PetscFree(larray);CHKERRQ(ierr); 3889 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3890 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3891 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3892 } else { 3893 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3894 } 3895 3896 PetscFunctionReturn(0); 3897 } 3898 3899 #if defined(PETSC_HAVE_ADIC) 3900 #undef __FUNCT__ 3901 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3902 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3903 { 3904 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3905 PetscErrorCode ierr; 3906 3907 PetscFunctionBegin; 3908 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3909 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3910 PetscFunctionReturn(0); 3911 } 3912 #endif 3913 3914 #undef __FUNCT__ 3915 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3916 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3917 { 3918 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3919 PetscErrorCode ierr; 3920 3921 PetscFunctionBegin; 3922 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3923 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3924 PetscFunctionReturn(0); 3925 } 3926 3927 #undef __FUNCT__ 3928 #define __FUNCT__ "MatMerge" 3929 /*@ 3930 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3931 matrices from each processor 3932 3933 Collective on MPI_Comm 3934 3935 Input Parameters: 3936 + comm - the communicators the parallel matrix will live on 3937 . inmat - the input sequential matrices 3938 . n - number of local columns (or PETSC_DECIDE) 3939 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3940 3941 Output Parameter: 3942 . outmat - the parallel matrix generated 3943 3944 Level: advanced 3945 3946 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3947 3948 @*/ 3949 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3950 { 3951 PetscErrorCode ierr; 3952 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3953 PetscInt *indx; 3954 PetscScalar *values; 3955 3956 PetscFunctionBegin; 3957 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3958 if (scall == MAT_INITIAL_MATRIX){ 3959 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3960 if (n == PETSC_DECIDE){ 3961 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3962 } 3963 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3964 rstart -= m; 3965 3966 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3967 for (i=0;i<m;i++) { 3968 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3969 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3970 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3971 } 3972 /* This routine will ONLY return MPIAIJ type matrix */ 3973 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3974 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3975 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3976 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3977 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3978 3979 } else if (scall == MAT_REUSE_MATRIX){ 3980 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3981 } else { 3982 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3983 } 3984 3985 for (i=0;i<m;i++) { 3986 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3987 Ii = i + rstart; 3988 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3989 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3990 } 3991 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3992 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3993 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3994 3995 PetscFunctionReturn(0); 3996 } 3997 3998 #undef __FUNCT__ 3999 #define __FUNCT__ "MatFileSplit" 4000 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4001 { 4002 PetscErrorCode ierr; 4003 PetscMPIInt rank; 4004 PetscInt m,N,i,rstart,nnz; 4005 size_t len; 4006 const PetscInt *indx; 4007 PetscViewer out; 4008 char *name; 4009 Mat B; 4010 const PetscScalar *values; 4011 4012 PetscFunctionBegin; 4013 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4014 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4015 /* Should this be the type of the diagonal block of A? */ 4016 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4017 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4018 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4019 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4020 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4021 for (i=0;i<m;i++) { 4022 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4023 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4024 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4025 } 4026 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4027 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4028 4029 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4030 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4031 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4032 sprintf(name,"%s.%d",outfile,rank); 4033 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4034 ierr = PetscFree(name); 4035 ierr = MatView(B,out);CHKERRQ(ierr); 4036 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4037 ierr = MatDestroy(B);CHKERRQ(ierr); 4038 PetscFunctionReturn(0); 4039 } 4040 4041 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4042 #undef __FUNCT__ 4043 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4044 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4045 { 4046 PetscErrorCode ierr; 4047 Mat_Merge_SeqsToMPI *merge; 4048 PetscContainer container; 4049 4050 PetscFunctionBegin; 4051 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4052 if (container) { 4053 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4054 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4055 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4056 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4057 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4058 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4059 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4060 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4061 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4062 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4063 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4064 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4065 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4066 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4067 4068 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4069 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4070 } 4071 ierr = PetscFree(merge);CHKERRQ(ierr); 4072 4073 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4074 PetscFunctionReturn(0); 4075 } 4076 4077 #include "../src/mat/utils/freespace.h" 4078 #include "petscbt.h" 4079 4080 #undef __FUNCT__ 4081 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4082 /*@C 4083 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4084 matrices from each processor 4085 4086 Collective on MPI_Comm 4087 4088 Input Parameters: 4089 + comm - the communicators the parallel matrix will live on 4090 . seqmat - the input sequential matrices 4091 . m - number of local rows (or PETSC_DECIDE) 4092 . n - number of local columns (or PETSC_DECIDE) 4093 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4094 4095 Output Parameter: 4096 . mpimat - the parallel matrix generated 4097 4098 Level: advanced 4099 4100 Notes: 4101 The dimensions of the sequential matrix in each processor MUST be the same. 4102 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4103 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4104 @*/ 4105 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4106 { 4107 PetscErrorCode ierr; 4108 MPI_Comm comm=((PetscObject)mpimat)->comm; 4109 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4110 PetscMPIInt size,rank,taga,*len_s; 4111 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4112 PetscInt proc,m; 4113 PetscInt **buf_ri,**buf_rj; 4114 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4115 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4116 MPI_Request *s_waits,*r_waits; 4117 MPI_Status *status; 4118 MatScalar *aa=a->a; 4119 MatScalar **abuf_r,*ba_i; 4120 Mat_Merge_SeqsToMPI *merge; 4121 PetscContainer container; 4122 4123 PetscFunctionBegin; 4124 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4125 4126 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4127 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4128 4129 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4130 if (container) { 4131 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4132 } 4133 bi = merge->bi; 4134 bj = merge->bj; 4135 buf_ri = merge->buf_ri; 4136 buf_rj = merge->buf_rj; 4137 4138 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4139 owners = merge->rowmap->range; 4140 len_s = merge->len_s; 4141 4142 /* send and recv matrix values */ 4143 /*-----------------------------*/ 4144 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4145 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4146 4147 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4148 for (proc=0,k=0; proc<size; proc++){ 4149 if (!len_s[proc]) continue; 4150 i = owners[proc]; 4151 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4152 k++; 4153 } 4154 4155 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4156 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4157 ierr = PetscFree(status);CHKERRQ(ierr); 4158 4159 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4160 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4161 4162 /* insert mat values of mpimat */ 4163 /*----------------------------*/ 4164 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4165 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4166 4167 for (k=0; k<merge->nrecv; k++){ 4168 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4169 nrows = *(buf_ri_k[k]); 4170 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4171 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4172 } 4173 4174 /* set values of ba */ 4175 m = merge->rowmap->n; 4176 for (i=0; i<m; i++) { 4177 arow = owners[rank] + i; 4178 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4179 bnzi = bi[i+1] - bi[i]; 4180 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4181 4182 /* add local non-zero vals of this proc's seqmat into ba */ 4183 anzi = ai[arow+1] - ai[arow]; 4184 aj = a->j + ai[arow]; 4185 aa = a->a + ai[arow]; 4186 nextaj = 0; 4187 for (j=0; nextaj<anzi; j++){ 4188 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4189 ba_i[j] += aa[nextaj++]; 4190 } 4191 } 4192 4193 /* add received vals into ba */ 4194 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4195 /* i-th row */ 4196 if (i == *nextrow[k]) { 4197 anzi = *(nextai[k]+1) - *nextai[k]; 4198 aj = buf_rj[k] + *(nextai[k]); 4199 aa = abuf_r[k] + *(nextai[k]); 4200 nextaj = 0; 4201 for (j=0; nextaj<anzi; j++){ 4202 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4203 ba_i[j] += aa[nextaj++]; 4204 } 4205 } 4206 nextrow[k]++; nextai[k]++; 4207 } 4208 } 4209 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4210 } 4211 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4212 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4213 4214 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4215 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4216 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4217 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4218 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4219 PetscFunctionReturn(0); 4220 } 4221 4222 #undef __FUNCT__ 4223 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4224 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4225 { 4226 PetscErrorCode ierr; 4227 Mat B_mpi; 4228 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4229 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4230 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4231 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4232 PetscInt len,proc,*dnz,*onz; 4233 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4234 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4235 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4236 MPI_Status *status; 4237 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4238 PetscBT lnkbt; 4239 Mat_Merge_SeqsToMPI *merge; 4240 PetscContainer container; 4241 4242 PetscFunctionBegin; 4243 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4244 4245 /* make sure it is a PETSc comm */ 4246 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4247 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4249 4250 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4251 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4252 4253 /* determine row ownership */ 4254 /*---------------------------------------------------------*/ 4255 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4256 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4257 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4258 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4259 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4260 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4261 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4262 4263 m = merge->rowmap->n; 4264 M = merge->rowmap->N; 4265 owners = merge->rowmap->range; 4266 4267 /* determine the number of messages to send, their lengths */ 4268 /*---------------------------------------------------------*/ 4269 len_s = merge->len_s; 4270 4271 len = 0; /* length of buf_si[] */ 4272 merge->nsend = 0; 4273 for (proc=0; proc<size; proc++){ 4274 len_si[proc] = 0; 4275 if (proc == rank){ 4276 len_s[proc] = 0; 4277 } else { 4278 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4279 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4280 } 4281 if (len_s[proc]) { 4282 merge->nsend++; 4283 nrows = 0; 4284 for (i=owners[proc]; i<owners[proc+1]; i++){ 4285 if (ai[i+1] > ai[i]) nrows++; 4286 } 4287 len_si[proc] = 2*(nrows+1); 4288 len += len_si[proc]; 4289 } 4290 } 4291 4292 /* determine the number and length of messages to receive for ij-structure */ 4293 /*-------------------------------------------------------------------------*/ 4294 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4295 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4296 4297 /* post the Irecv of j-structure */ 4298 /*-------------------------------*/ 4299 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4300 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4301 4302 /* post the Isend of j-structure */ 4303 /*--------------------------------*/ 4304 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4305 4306 for (proc=0, k=0; proc<size; proc++){ 4307 if (!len_s[proc]) continue; 4308 i = owners[proc]; 4309 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4310 k++; 4311 } 4312 4313 /* receives and sends of j-structure are complete */ 4314 /*------------------------------------------------*/ 4315 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4316 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4317 4318 /* send and recv i-structure */ 4319 /*---------------------------*/ 4320 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4321 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4322 4323 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4324 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4325 for (proc=0,k=0; proc<size; proc++){ 4326 if (!len_s[proc]) continue; 4327 /* form outgoing message for i-structure: 4328 buf_si[0]: nrows to be sent 4329 [1:nrows]: row index (global) 4330 [nrows+1:2*nrows+1]: i-structure index 4331 */ 4332 /*-------------------------------------------*/ 4333 nrows = len_si[proc]/2 - 1; 4334 buf_si_i = buf_si + nrows+1; 4335 buf_si[0] = nrows; 4336 buf_si_i[0] = 0; 4337 nrows = 0; 4338 for (i=owners[proc]; i<owners[proc+1]; i++){ 4339 anzi = ai[i+1] - ai[i]; 4340 if (anzi) { 4341 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4342 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4343 nrows++; 4344 } 4345 } 4346 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4347 k++; 4348 buf_si += len_si[proc]; 4349 } 4350 4351 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4352 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4353 4354 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4355 for (i=0; i<merge->nrecv; i++){ 4356 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4357 } 4358 4359 ierr = PetscFree(len_si);CHKERRQ(ierr); 4360 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4361 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4362 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4363 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4364 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4365 ierr = PetscFree(status);CHKERRQ(ierr); 4366 4367 /* compute a local seq matrix in each processor */ 4368 /*----------------------------------------------*/ 4369 /* allocate bi array and free space for accumulating nonzero column info */ 4370 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4371 bi[0] = 0; 4372 4373 /* create and initialize a linked list */ 4374 nlnk = N+1; 4375 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4376 4377 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4378 len = 0; 4379 len = ai[owners[rank+1]] - ai[owners[rank]]; 4380 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4381 current_space = free_space; 4382 4383 /* determine symbolic info for each local row */ 4384 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4385 4386 for (k=0; k<merge->nrecv; k++){ 4387 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4388 nrows = *buf_ri_k[k]; 4389 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4390 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4391 } 4392 4393 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4394 len = 0; 4395 for (i=0;i<m;i++) { 4396 bnzi = 0; 4397 /* add local non-zero cols of this proc's seqmat into lnk */ 4398 arow = owners[rank] + i; 4399 anzi = ai[arow+1] - ai[arow]; 4400 aj = a->j + ai[arow]; 4401 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4402 bnzi += nlnk; 4403 /* add received col data into lnk */ 4404 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4405 if (i == *nextrow[k]) { /* i-th row */ 4406 anzi = *(nextai[k]+1) - *nextai[k]; 4407 aj = buf_rj[k] + *nextai[k]; 4408 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4409 bnzi += nlnk; 4410 nextrow[k]++; nextai[k]++; 4411 } 4412 } 4413 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4414 4415 /* if free space is not available, make more free space */ 4416 if (current_space->local_remaining<bnzi) { 4417 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4418 nspacedouble++; 4419 } 4420 /* copy data into free space, then initialize lnk */ 4421 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4422 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4423 4424 current_space->array += bnzi; 4425 current_space->local_used += bnzi; 4426 current_space->local_remaining -= bnzi; 4427 4428 bi[i+1] = bi[i] + bnzi; 4429 } 4430 4431 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4432 4433 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4434 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4435 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4436 4437 /* create symbolic parallel matrix B_mpi */ 4438 /*---------------------------------------*/ 4439 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4440 if (n==PETSC_DECIDE) { 4441 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4442 } else { 4443 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4444 } 4445 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4446 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4447 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4448 4449 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4450 B_mpi->assembled = PETSC_FALSE; 4451 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4452 merge->bi = bi; 4453 merge->bj = bj; 4454 merge->buf_ri = buf_ri; 4455 merge->buf_rj = buf_rj; 4456 merge->coi = PETSC_NULL; 4457 merge->coj = PETSC_NULL; 4458 merge->owners_co = PETSC_NULL; 4459 4460 /* attach the supporting struct to B_mpi for reuse */ 4461 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4462 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4463 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4464 *mpimat = B_mpi; 4465 4466 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4467 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4468 PetscFunctionReturn(0); 4469 } 4470 4471 #undef __FUNCT__ 4472 #define __FUNCT__ "MatMerge_SeqsToMPI" 4473 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4474 { 4475 PetscErrorCode ierr; 4476 4477 PetscFunctionBegin; 4478 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4479 if (scall == MAT_INITIAL_MATRIX){ 4480 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4481 } 4482 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4483 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4484 PetscFunctionReturn(0); 4485 } 4486 4487 #undef __FUNCT__ 4488 #define __FUNCT__ "MatGetLocalMat" 4489 /*@ 4490 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4491 4492 Not Collective 4493 4494 Input Parameters: 4495 + A - the matrix 4496 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4497 4498 Output Parameter: 4499 . A_loc - the local sequential matrix generated 4500 4501 Level: developer 4502 4503 @*/ 4504 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4505 { 4506 PetscErrorCode ierr; 4507 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4508 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4509 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4510 MatScalar *aa=a->a,*ba=b->a,*cam; 4511 PetscScalar *ca; 4512 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4513 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4514 4515 PetscFunctionBegin; 4516 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4517 if (scall == MAT_INITIAL_MATRIX){ 4518 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4519 ci[0] = 0; 4520 for (i=0; i<am; i++){ 4521 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4522 } 4523 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4524 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4525 k = 0; 4526 for (i=0; i<am; i++) { 4527 ncols_o = bi[i+1] - bi[i]; 4528 ncols_d = ai[i+1] - ai[i]; 4529 /* off-diagonal portion of A */ 4530 for (jo=0; jo<ncols_o; jo++) { 4531 col = cmap[*bj]; 4532 if (col >= cstart) break; 4533 cj[k] = col; bj++; 4534 ca[k++] = *ba++; 4535 } 4536 /* diagonal portion of A */ 4537 for (j=0; j<ncols_d; j++) { 4538 cj[k] = cstart + *aj++; 4539 ca[k++] = *aa++; 4540 } 4541 /* off-diagonal portion of A */ 4542 for (j=jo; j<ncols_o; j++) { 4543 cj[k] = cmap[*bj++]; 4544 ca[k++] = *ba++; 4545 } 4546 } 4547 /* put together the new matrix */ 4548 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4549 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4550 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4551 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4552 mat->free_a = PETSC_TRUE; 4553 mat->free_ij = PETSC_TRUE; 4554 mat->nonew = 0; 4555 } else if (scall == MAT_REUSE_MATRIX){ 4556 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4557 ci = mat->i; cj = mat->j; cam = mat->a; 4558 for (i=0; i<am; i++) { 4559 /* off-diagonal portion of A */ 4560 ncols_o = bi[i+1] - bi[i]; 4561 for (jo=0; jo<ncols_o; jo++) { 4562 col = cmap[*bj]; 4563 if (col >= cstart) break; 4564 *cam++ = *ba++; bj++; 4565 } 4566 /* diagonal portion of A */ 4567 ncols_d = ai[i+1] - ai[i]; 4568 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4569 /* off-diagonal portion of A */ 4570 for (j=jo; j<ncols_o; j++) { 4571 *cam++ = *ba++; bj++; 4572 } 4573 } 4574 } else { 4575 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4576 } 4577 4578 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4579 PetscFunctionReturn(0); 4580 } 4581 4582 #undef __FUNCT__ 4583 #define __FUNCT__ "MatGetLocalMatCondensed" 4584 /*@C 4585 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4586 4587 Not Collective 4588 4589 Input Parameters: 4590 + A - the matrix 4591 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4592 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4593 4594 Output Parameter: 4595 . A_loc - the local sequential matrix generated 4596 4597 Level: developer 4598 4599 @*/ 4600 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4601 { 4602 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4603 PetscErrorCode ierr; 4604 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4605 IS isrowa,iscola; 4606 Mat *aloc; 4607 4608 PetscFunctionBegin; 4609 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4610 if (!row){ 4611 start = A->rmap->rstart; end = A->rmap->rend; 4612 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4613 } else { 4614 isrowa = *row; 4615 } 4616 if (!col){ 4617 start = A->cmap->rstart; 4618 cmap = a->garray; 4619 nzA = a->A->cmap->n; 4620 nzB = a->B->cmap->n; 4621 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4622 ncols = 0; 4623 for (i=0; i<nzB; i++) { 4624 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4625 else break; 4626 } 4627 imark = i; 4628 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4629 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4630 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4631 } else { 4632 iscola = *col; 4633 } 4634 if (scall != MAT_INITIAL_MATRIX){ 4635 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4636 aloc[0] = *A_loc; 4637 } 4638 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4639 *A_loc = aloc[0]; 4640 ierr = PetscFree(aloc);CHKERRQ(ierr); 4641 if (!row){ 4642 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4643 } 4644 if (!col){ 4645 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4646 } 4647 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4648 PetscFunctionReturn(0); 4649 } 4650 4651 #undef __FUNCT__ 4652 #define __FUNCT__ "MatGetBrowsOfAcols" 4653 /*@C 4654 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4655 4656 Collective on Mat 4657 4658 Input Parameters: 4659 + A,B - the matrices in mpiaij format 4660 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4661 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4662 4663 Output Parameter: 4664 + rowb, colb - index sets of rows and columns of B to extract 4665 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4666 - B_seq - the sequential matrix generated 4667 4668 Level: developer 4669 4670 @*/ 4671 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4672 { 4673 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4674 PetscErrorCode ierr; 4675 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4676 IS isrowb,iscolb; 4677 Mat *bseq; 4678 4679 PetscFunctionBegin; 4680 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4681 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4682 } 4683 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4684 4685 if (scall == MAT_INITIAL_MATRIX){ 4686 start = A->cmap->rstart; 4687 cmap = a->garray; 4688 nzA = a->A->cmap->n; 4689 nzB = a->B->cmap->n; 4690 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4691 ncols = 0; 4692 for (i=0; i<nzB; i++) { /* row < local row index */ 4693 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4694 else break; 4695 } 4696 imark = i; 4697 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4698 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4699 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4700 *brstart = imark; 4701 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4702 } else { 4703 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4704 isrowb = *rowb; iscolb = *colb; 4705 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4706 bseq[0] = *B_seq; 4707 } 4708 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4709 *B_seq = bseq[0]; 4710 ierr = PetscFree(bseq);CHKERRQ(ierr); 4711 if (!rowb){ 4712 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4713 } else { 4714 *rowb = isrowb; 4715 } 4716 if (!colb){ 4717 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4718 } else { 4719 *colb = iscolb; 4720 } 4721 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4722 PetscFunctionReturn(0); 4723 } 4724 4725 #undef __FUNCT__ 4726 #define __FUNCT__ "MatGetBrowsOfAoCols" 4727 /*@C 4728 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4729 of the OFF-DIAGONAL portion of local A 4730 4731 Collective on Mat 4732 4733 Input Parameters: 4734 + A,B - the matrices in mpiaij format 4735 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4736 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4737 . startsj_r - similar to startsj for receives 4738 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4739 4740 Output Parameter: 4741 + B_oth - the sequential matrix generated 4742 4743 Level: developer 4744 4745 @*/ 4746 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4747 { 4748 VecScatter_MPI_General *gen_to,*gen_from; 4749 PetscErrorCode ierr; 4750 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4751 Mat_SeqAIJ *b_oth; 4752 VecScatter ctx=a->Mvctx; 4753 MPI_Comm comm=((PetscObject)ctx)->comm; 4754 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4755 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4756 PetscScalar *rvalues,*svalues; 4757 MatScalar *b_otha,*bufa,*bufA; 4758 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4759 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4760 MPI_Status *sstatus,rstatus; 4761 PetscMPIInt jj; 4762 PetscInt *cols,sbs,rbs; 4763 PetscScalar *vals; 4764 4765 PetscFunctionBegin; 4766 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4767 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4768 } 4769 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4770 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4771 4772 gen_to = (VecScatter_MPI_General*)ctx->todata; 4773 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4774 rvalues = gen_from->values; /* holds the length of receiving row */ 4775 svalues = gen_to->values; /* holds the length of sending row */ 4776 nrecvs = gen_from->n; 4777 nsends = gen_to->n; 4778 4779 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4780 srow = gen_to->indices; /* local row index to be sent */ 4781 sstarts = gen_to->starts; 4782 sprocs = gen_to->procs; 4783 sstatus = gen_to->sstatus; 4784 sbs = gen_to->bs; 4785 rstarts = gen_from->starts; 4786 rprocs = gen_from->procs; 4787 rbs = gen_from->bs; 4788 4789 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4790 if (scall == MAT_INITIAL_MATRIX){ 4791 /* i-array */ 4792 /*---------*/ 4793 /* post receives */ 4794 for (i=0; i<nrecvs; i++){ 4795 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4796 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4797 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4798 } 4799 4800 /* pack the outgoing message */ 4801 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4802 sstartsj[0] = 0; rstartsj[0] = 0; 4803 len = 0; /* total length of j or a array to be sent */ 4804 k = 0; 4805 for (i=0; i<nsends; i++){ 4806 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4807 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4808 for (j=0; j<nrows; j++) { 4809 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4810 for (l=0; l<sbs; l++){ 4811 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4812 rowlen[j*sbs+l] = ncols; 4813 len += ncols; 4814 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4815 } 4816 k++; 4817 } 4818 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4819 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4820 } 4821 /* recvs and sends of i-array are completed */ 4822 i = nrecvs; 4823 while (i--) { 4824 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4825 } 4826 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4827 4828 /* allocate buffers for sending j and a arrays */ 4829 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4830 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4831 4832 /* create i-array of B_oth */ 4833 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4834 b_othi[0] = 0; 4835 len = 0; /* total length of j or a array to be received */ 4836 k = 0; 4837 for (i=0; i<nrecvs; i++){ 4838 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4839 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4840 for (j=0; j<nrows; j++) { 4841 b_othi[k+1] = b_othi[k] + rowlen[j]; 4842 len += rowlen[j]; k++; 4843 } 4844 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4845 } 4846 4847 /* allocate space for j and a arrrays of B_oth */ 4848 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4849 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4850 4851 /* j-array */ 4852 /*---------*/ 4853 /* post receives of j-array */ 4854 for (i=0; i<nrecvs; i++){ 4855 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4856 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4857 } 4858 4859 /* pack the outgoing message j-array */ 4860 k = 0; 4861 for (i=0; i<nsends; i++){ 4862 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4863 bufJ = bufj+sstartsj[i]; 4864 for (j=0; j<nrows; j++) { 4865 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4866 for (ll=0; ll<sbs; ll++){ 4867 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4868 for (l=0; l<ncols; l++){ 4869 *bufJ++ = cols[l]; 4870 } 4871 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4872 } 4873 } 4874 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4875 } 4876 4877 /* recvs and sends of j-array are completed */ 4878 i = nrecvs; 4879 while (i--) { 4880 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4881 } 4882 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4883 } else if (scall == MAT_REUSE_MATRIX){ 4884 sstartsj = *startsj; 4885 rstartsj = *startsj_r; 4886 bufa = *bufa_ptr; 4887 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4888 b_otha = b_oth->a; 4889 } else { 4890 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4891 } 4892 4893 /* a-array */ 4894 /*---------*/ 4895 /* post receives of a-array */ 4896 for (i=0; i<nrecvs; i++){ 4897 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4898 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4899 } 4900 4901 /* pack the outgoing message a-array */ 4902 k = 0; 4903 for (i=0; i<nsends; i++){ 4904 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4905 bufA = bufa+sstartsj[i]; 4906 for (j=0; j<nrows; j++) { 4907 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4908 for (ll=0; ll<sbs; ll++){ 4909 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4910 for (l=0; l<ncols; l++){ 4911 *bufA++ = vals[l]; 4912 } 4913 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4914 } 4915 } 4916 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4917 } 4918 /* recvs and sends of a-array are completed */ 4919 i = nrecvs; 4920 while (i--) { 4921 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4922 } 4923 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4924 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4925 4926 if (scall == MAT_INITIAL_MATRIX){ 4927 /* put together the new matrix */ 4928 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4929 4930 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4931 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4932 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4933 b_oth->free_a = PETSC_TRUE; 4934 b_oth->free_ij = PETSC_TRUE; 4935 b_oth->nonew = 0; 4936 4937 ierr = PetscFree(bufj);CHKERRQ(ierr); 4938 if (!startsj || !bufa_ptr){ 4939 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4940 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4941 } else { 4942 *startsj = sstartsj; 4943 *startsj_r = rstartsj; 4944 *bufa_ptr = bufa; 4945 } 4946 } 4947 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4948 PetscFunctionReturn(0); 4949 } 4950 4951 #undef __FUNCT__ 4952 #define __FUNCT__ "MatGetCommunicationStructs" 4953 /*@C 4954 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4955 4956 Not Collective 4957 4958 Input Parameters: 4959 . A - The matrix in mpiaij format 4960 4961 Output Parameter: 4962 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4963 . colmap - A map from global column index to local index into lvec 4964 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4965 4966 Level: developer 4967 4968 @*/ 4969 #if defined (PETSC_USE_CTABLE) 4970 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4971 #else 4972 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4973 #endif 4974 { 4975 Mat_MPIAIJ *a; 4976 4977 PetscFunctionBegin; 4978 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4979 PetscValidPointer(lvec, 2); 4980 PetscValidPointer(colmap, 3); 4981 PetscValidPointer(multScatter, 4); 4982 a = (Mat_MPIAIJ *) A->data; 4983 if (lvec) *lvec = a->lvec; 4984 if (colmap) *colmap = a->colmap; 4985 if (multScatter) *multScatter = a->Mvctx; 4986 PetscFunctionReturn(0); 4987 } 4988 4989 EXTERN_C_BEGIN 4990 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 4991 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 4992 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4993 EXTERN_C_END 4994 4995 #undef __FUNCT__ 4996 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4997 /* 4998 Computes (B'*A')' since computing B*A directly is untenable 4999 5000 n p p 5001 ( ) ( ) ( ) 5002 m ( A ) * n ( B ) = m ( C ) 5003 ( ) ( ) ( ) 5004 5005 */ 5006 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5007 { 5008 PetscErrorCode ierr; 5009 Mat At,Bt,Ct; 5010 5011 PetscFunctionBegin; 5012 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5013 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5014 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5015 ierr = MatDestroy(At);CHKERRQ(ierr); 5016 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5017 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5018 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5019 PetscFunctionReturn(0); 5020 } 5021 5022 #undef __FUNCT__ 5023 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5024 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5025 { 5026 PetscErrorCode ierr; 5027 PetscInt m=A->rmap->n,n=B->cmap->n; 5028 Mat Cmat; 5029 5030 PetscFunctionBegin; 5031 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5032 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5033 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5034 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5035 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5036 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5037 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5038 *C = Cmat; 5039 PetscFunctionReturn(0); 5040 } 5041 5042 /* ----------------------------------------------------------------*/ 5043 #undef __FUNCT__ 5044 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5045 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5046 { 5047 PetscErrorCode ierr; 5048 5049 PetscFunctionBegin; 5050 if (scall == MAT_INITIAL_MATRIX){ 5051 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5052 } 5053 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5054 PetscFunctionReturn(0); 5055 } 5056 5057 EXTERN_C_BEGIN 5058 #if defined(PETSC_HAVE_MUMPS) 5059 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5060 #endif 5061 #if defined(PETSC_HAVE_PASTIX) 5062 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5063 #endif 5064 #if defined(PETSC_HAVE_SUPERLU_DIST) 5065 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5066 #endif 5067 #if defined(PETSC_HAVE_SPOOLES) 5068 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5069 #endif 5070 EXTERN_C_END 5071 5072 /*MC 5073 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5074 5075 Options Database Keys: 5076 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5077 5078 Level: beginner 5079 5080 .seealso: MatCreateMPIAIJ() 5081 M*/ 5082 5083 EXTERN_C_BEGIN 5084 #undef __FUNCT__ 5085 #define __FUNCT__ "MatCreate_MPIAIJ" 5086 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5087 { 5088 Mat_MPIAIJ *b; 5089 PetscErrorCode ierr; 5090 PetscMPIInt size; 5091 5092 PetscFunctionBegin; 5093 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5094 5095 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5096 B->data = (void*)b; 5097 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5098 B->rmap->bs = 1; 5099 B->assembled = PETSC_FALSE; 5100 B->mapping = 0; 5101 5102 B->insertmode = NOT_SET_VALUES; 5103 b->size = size; 5104 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5105 5106 /* build cache for off array entries formed */ 5107 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5108 b->donotstash = PETSC_FALSE; 5109 b->colmap = 0; 5110 b->garray = 0; 5111 b->roworiented = PETSC_TRUE; 5112 5113 /* stuff used for matrix vector multiply */ 5114 b->lvec = PETSC_NULL; 5115 b->Mvctx = PETSC_NULL; 5116 5117 /* stuff for MatGetRow() */ 5118 b->rowindices = 0; 5119 b->rowvalues = 0; 5120 b->getrowactive = PETSC_FALSE; 5121 5122 #if defined(PETSC_HAVE_SPOOLES) 5123 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5124 "MatGetFactor_mpiaij_spooles", 5125 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5126 #endif 5127 #if defined(PETSC_HAVE_MUMPS) 5128 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5129 "MatGetFactor_aij_mumps", 5130 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5131 #endif 5132 #if defined(PETSC_HAVE_PASTIX) 5133 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5134 "MatGetFactor_mpiaij_pastix", 5135 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5136 #endif 5137 #if defined(PETSC_HAVE_SUPERLU_DIST) 5138 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5139 "MatGetFactor_mpiaij_superlu_dist", 5140 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5141 #endif 5142 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5143 "MatStoreValues_MPIAIJ", 5144 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5145 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5146 "MatRetrieveValues_MPIAIJ", 5147 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5148 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5149 "MatGetDiagonalBlock_MPIAIJ", 5150 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5151 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5152 "MatIsTranspose_MPIAIJ", 5153 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5154 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5155 "MatMPIAIJSetPreallocation_MPIAIJ", 5156 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5157 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5158 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5159 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5160 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5161 "MatDiagonalScaleLocal_MPIAIJ", 5162 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5163 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5164 "MatConvert_MPIAIJ_MPIAIJPERM", 5165 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5166 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5167 "MatConvert_MPIAIJ_MPIAIJCRL", 5168 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5169 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5170 "MatConvert_MPIAIJ_MPISBAIJ", 5171 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5172 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5173 "MatMatMult_MPIDense_MPIAIJ", 5174 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5175 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5176 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5177 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5178 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5179 "MatMatMultNumeric_MPIDense_MPIAIJ", 5180 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5181 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5182 PetscFunctionReturn(0); 5183 } 5184 EXTERN_C_END 5185 5186 #undef __FUNCT__ 5187 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5188 /*@ 5189 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5190 and "off-diagonal" part of the matrix in CSR format. 5191 5192 Collective on MPI_Comm 5193 5194 Input Parameters: 5195 + comm - MPI communicator 5196 . m - number of local rows (Cannot be PETSC_DECIDE) 5197 . n - This value should be the same as the local size used in creating the 5198 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5199 calculated if N is given) For square matrices n is almost always m. 5200 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5201 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5202 . i - row indices for "diagonal" portion of matrix 5203 . j - column indices 5204 . a - matrix values 5205 . oi - row indices for "off-diagonal" portion of matrix 5206 . oj - column indices 5207 - oa - matrix values 5208 5209 Output Parameter: 5210 . mat - the matrix 5211 5212 Level: advanced 5213 5214 Notes: 5215 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5216 5217 The i and j indices are 0 based 5218 5219 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5220 5221 This sets local rows and cannot be used to set off-processor values. 5222 5223 You cannot later use MatSetValues() to change values in this matrix. 5224 5225 .keywords: matrix, aij, compressed row, sparse, parallel 5226 5227 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5228 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5229 @*/ 5230 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5231 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5232 { 5233 PetscErrorCode ierr; 5234 Mat_MPIAIJ *maij; 5235 5236 PetscFunctionBegin; 5237 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5238 if (i[0]) { 5239 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5240 } 5241 if (oi[0]) { 5242 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5243 } 5244 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5245 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5246 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5247 maij = (Mat_MPIAIJ*) (*mat)->data; 5248 maij->donotstash = PETSC_TRUE; 5249 (*mat)->preallocated = PETSC_TRUE; 5250 5251 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5252 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5253 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5254 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5255 5256 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5257 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5258 5259 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5260 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5261 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5262 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5263 5264 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5265 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5266 PetscFunctionReturn(0); 5267 } 5268 5269 /* 5270 Special version for direct calls from Fortran 5271 */ 5272 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5273 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5274 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5275 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5276 #endif 5277 5278 /* Change these macros so can be used in void function */ 5279 #undef CHKERRQ 5280 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5281 #undef SETERRQ2 5282 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5283 #undef SETERRQ 5284 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5285 5286 EXTERN_C_BEGIN 5287 #undef __FUNCT__ 5288 #define __FUNCT__ "matsetvaluesmpiaij_" 5289 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5290 { 5291 Mat mat = *mmat; 5292 PetscInt m = *mm, n = *mn; 5293 InsertMode addv = *maddv; 5294 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5295 PetscScalar value; 5296 PetscErrorCode ierr; 5297 5298 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5299 if (mat->insertmode == NOT_SET_VALUES) { 5300 mat->insertmode = addv; 5301 } 5302 #if defined(PETSC_USE_DEBUG) 5303 else if (mat->insertmode != addv) { 5304 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5305 } 5306 #endif 5307 { 5308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5310 PetscBool roworiented = aij->roworiented; 5311 5312 /* Some Variables required in the macro */ 5313 Mat A = aij->A; 5314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5316 MatScalar *aa = a->a; 5317 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5318 Mat B = aij->B; 5319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5321 MatScalar *ba = b->a; 5322 5323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5324 PetscInt nonew = a->nonew; 5325 MatScalar *ap1,*ap2; 5326 5327 PetscFunctionBegin; 5328 for (i=0; i<m; i++) { 5329 if (im[i] < 0) continue; 5330 #if defined(PETSC_USE_DEBUG) 5331 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5332 #endif 5333 if (im[i] >= rstart && im[i] < rend) { 5334 row = im[i] - rstart; 5335 lastcol1 = -1; 5336 rp1 = aj + ai[row]; 5337 ap1 = aa + ai[row]; 5338 rmax1 = aimax[row]; 5339 nrow1 = ailen[row]; 5340 low1 = 0; 5341 high1 = nrow1; 5342 lastcol2 = -1; 5343 rp2 = bj + bi[row]; 5344 ap2 = ba + bi[row]; 5345 rmax2 = bimax[row]; 5346 nrow2 = bilen[row]; 5347 low2 = 0; 5348 high2 = nrow2; 5349 5350 for (j=0; j<n; j++) { 5351 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5352 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5353 if (in[j] >= cstart && in[j] < cend){ 5354 col = in[j] - cstart; 5355 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5356 } else if (in[j] < 0) continue; 5357 #if defined(PETSC_USE_DEBUG) 5358 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5359 #endif 5360 else { 5361 if (mat->was_assembled) { 5362 if (!aij->colmap) { 5363 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5364 } 5365 #if defined (PETSC_USE_CTABLE) 5366 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5367 col--; 5368 #else 5369 col = aij->colmap[in[j]] - 1; 5370 #endif 5371 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5372 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5373 col = in[j]; 5374 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5375 B = aij->B; 5376 b = (Mat_SeqAIJ*)B->data; 5377 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5378 rp2 = bj + bi[row]; 5379 ap2 = ba + bi[row]; 5380 rmax2 = bimax[row]; 5381 nrow2 = bilen[row]; 5382 low2 = 0; 5383 high2 = nrow2; 5384 bm = aij->B->rmap->n; 5385 ba = b->a; 5386 } 5387 } else col = in[j]; 5388 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5389 } 5390 } 5391 } else { 5392 if (!aij->donotstash) { 5393 if (roworiented) { 5394 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5395 } else { 5396 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5397 } 5398 } 5399 } 5400 }} 5401 PetscFunctionReturnVoid(); 5402 } 5403 EXTERN_C_END 5404 5405