1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 204 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 205 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 206 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 207 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 208 rowners[0] = 0; 209 for (i=2; i<=size; i++) { 210 rowners[i] += rowners[i-1]; 211 } 212 rstart = rowners[rank]; 213 rend = rowners[rank+1]; 214 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 215 if (!rank) { 216 gmata = (Mat_SeqAIJ*) gmat->data; 217 /* send row lengths to all processors */ 218 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 219 for (i=1; i<size; i++) { 220 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 /* determine number diagonal and off-diagonal counts */ 223 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 224 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 225 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 226 jj = 0; 227 for (i=0; i<m; i++) { 228 for (j=0; j<dlens[i]; j++) { 229 if (gmata->j[jj] < rstart) ld[i]++; 230 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 231 jj++; 232 } 233 } 234 /* send column indices to other processes */ 235 for (i=1; i<size; i++) { 236 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 237 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 238 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 239 } 240 241 /* send numerical values to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 245 } 246 gmataa = gmata->a; 247 gmataj = gmata->j; 248 249 } else { 250 /* receive row lengths */ 251 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 252 /* receive column indices */ 253 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 255 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 /* determine number diagonal and off-diagonal counts */ 257 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 258 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 259 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 260 jj = 0; 261 for (i=0; i<m; i++) { 262 for (j=0; j<dlens[i]; j++) { 263 if (gmataj[jj] < rstart) ld[i]++; 264 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 265 jj++; 266 } 267 } 268 /* receive numerical values */ 269 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 270 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 271 } 272 /* set preallocation */ 273 for (i=0; i<m; i++) { 274 dlens[i] -= olens[i]; 275 } 276 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 277 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 278 279 for (i=0; i<m; i++) { 280 dlens[i] += olens[i]; 281 } 282 cnt = 0; 283 for (i=0; i<m; i++) { 284 row = rstart + i; 285 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 286 cnt += dlens[i]; 287 } 288 if (rank) { 289 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 290 } 291 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 292 ierr = PetscFree(rowners);CHKERRQ(ierr); 293 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 294 *inmat = mat; 295 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 296 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 297 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 298 mat = *inmat; 299 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 300 if (!rank) { 301 /* send numerical values to other processes */ 302 gmata = (Mat_SeqAIJ*) gmat->data; 303 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 304 gmataa = gmata->a; 305 for (i=1; i<size; i++) { 306 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 307 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 308 } 309 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 310 } else { 311 /* receive numerical values from process 0*/ 312 nz = Ad->nz + Ao->nz; 313 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 314 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 315 } 316 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 317 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 318 ad = Ad->a; 319 ao = Ao->a; 320 if (mat->rmap->n) { 321 i = 0; 322 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 323 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 324 } 325 for (i=1; i<mat->rmap->n; i++) { 326 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 327 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 328 } 329 i--; 330 if (mat->rmap->n) { 331 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 } 333 if (rank) { 334 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 335 } 336 } 337 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 338 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 339 CHKMEMQ; 340 PetscFunctionReturn(0); 341 } 342 343 /* 344 Local utility routine that creates a mapping from the global column 345 number to the local number in the off-diagonal part of the local 346 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 347 a slightly higher hash table cost; without it it is not scalable (each processor 348 has an order N integer array but is fast to acess. 349 */ 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 352 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 353 { 354 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 355 PetscErrorCode ierr; 356 PetscInt n = aij->B->cmap->n,i; 357 358 PetscFunctionBegin; 359 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 360 #if defined (PETSC_USE_CTABLE) 361 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 362 for (i=0; i<n; i++){ 363 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 364 } 365 #else 366 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 367 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 368 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 369 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 370 #endif 371 PetscFunctionReturn(0); 372 } 373 374 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 375 { \ 376 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 377 lastcol1 = col;\ 378 while (high1-low1 > 5) { \ 379 t = (low1+high1)/2; \ 380 if (rp1[t] > col) high1 = t; \ 381 else low1 = t; \ 382 } \ 383 for (_i=low1; _i<high1; _i++) { \ 384 if (rp1[_i] > col) break; \ 385 if (rp1[_i] == col) { \ 386 if (addv == ADD_VALUES) ap1[_i] += value; \ 387 else ap1[_i] = value; \ 388 goto a_noinsert; \ 389 } \ 390 } \ 391 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 392 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 393 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 394 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 395 N = nrow1++ - 1; a->nz++; high1++; \ 396 /* shift up all the later entries in this row */ \ 397 for (ii=N; ii>=_i; ii--) { \ 398 rp1[ii+1] = rp1[ii]; \ 399 ap1[ii+1] = ap1[ii]; \ 400 } \ 401 rp1[_i] = col; \ 402 ap1[_i] = value; \ 403 a_noinsert: ; \ 404 ailen[row] = nrow1; \ 405 } 406 407 408 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 409 { \ 410 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 411 lastcol2 = col;\ 412 while (high2-low2 > 5) { \ 413 t = (low2+high2)/2; \ 414 if (rp2[t] > col) high2 = t; \ 415 else low2 = t; \ 416 } \ 417 for (_i=low2; _i<high2; _i++) { \ 418 if (rp2[_i] > col) break; \ 419 if (rp2[_i] == col) { \ 420 if (addv == ADD_VALUES) ap2[_i] += value; \ 421 else ap2[_i] = value; \ 422 goto b_noinsert; \ 423 } \ 424 } \ 425 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 426 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 427 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 428 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 429 N = nrow2++ - 1; b->nz++; high2++; \ 430 /* shift up all the later entries in this row */ \ 431 for (ii=N; ii>=_i; ii--) { \ 432 rp2[ii+1] = rp2[ii]; \ 433 ap2[ii+1] = ap2[ii]; \ 434 } \ 435 rp2[_i] = col; \ 436 ap2[_i] = value; \ 437 b_noinsert: ; \ 438 bilen[row] = nrow2; \ 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 443 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 444 { 445 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 446 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 447 PetscErrorCode ierr; 448 PetscInt l,*garray = mat->garray,diag; 449 450 PetscFunctionBegin; 451 /* code only works for square matrices A */ 452 453 /* find size of row to the left of the diagonal part */ 454 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 455 row = row - diag; 456 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 457 if (garray[b->j[b->i[row]+l]] > diag) break; 458 } 459 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 460 461 /* diagonal part */ 462 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 463 464 /* right of diagonal part */ 465 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatSetValues_MPIAIJ" 471 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 472 { 473 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 474 PetscScalar value; 475 PetscErrorCode ierr; 476 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 477 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 478 PetscBool roworiented = aij->roworiented; 479 480 /* Some Variables required in the macro */ 481 Mat A = aij->A; 482 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 483 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 484 MatScalar *aa = a->a; 485 PetscBool ignorezeroentries = a->ignorezeroentries; 486 Mat B = aij->B; 487 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 488 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 489 MatScalar *ba = b->a; 490 491 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 492 PetscInt nonew; 493 MatScalar *ap1,*ap2; 494 495 PetscFunctionBegin; 496 if (v) PetscValidScalarPointer(v,6); 497 for (i=0; i<m; i++) { 498 if (im[i] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 501 #endif 502 if (im[i] >= rstart && im[i] < rend) { 503 row = im[i] - rstart; 504 lastcol1 = -1; 505 rp1 = aj + ai[row]; 506 ap1 = aa + ai[row]; 507 rmax1 = aimax[row]; 508 nrow1 = ailen[row]; 509 low1 = 0; 510 high1 = nrow1; 511 lastcol2 = -1; 512 rp2 = bj + bi[row]; 513 ap2 = ba + bi[row]; 514 rmax2 = bimax[row]; 515 nrow2 = bilen[row]; 516 low2 = 0; 517 high2 = nrow2; 518 519 for (j=0; j<n; j++) { 520 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 521 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 522 if (in[j] >= cstart && in[j] < cend){ 523 col = in[j] - cstart; 524 nonew = a->nonew; 525 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 526 } else if (in[j] < 0) continue; 527 #if defined(PETSC_USE_DEBUG) 528 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 529 #endif 530 else { 531 if (mat->was_assembled) { 532 if (!aij->colmap) { 533 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 534 } 535 #if defined (PETSC_USE_CTABLE) 536 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 537 col--; 538 #else 539 col = aij->colmap[in[j]] - 1; 540 #endif 541 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 542 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 543 col = in[j]; 544 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 545 B = aij->B; 546 b = (Mat_SeqAIJ*)B->data; 547 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 548 rp2 = bj + bi[row]; 549 ap2 = ba + bi[row]; 550 rmax2 = bimax[row]; 551 nrow2 = bilen[row]; 552 low2 = 0; 553 high2 = nrow2; 554 bm = aij->B->rmap->n; 555 ba = b->a; 556 } 557 } else col = in[j]; 558 nonew = b->nonew; 559 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 560 } 561 } 562 } else { 563 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 564 if (!aij->donotstash) { 565 mat->assembled = PETSC_FALSE; 566 if (roworiented) { 567 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 568 } else { 569 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 570 } 571 } 572 } 573 } 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIAIJ" 579 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 584 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= rstart && idxm[i] < rend) { 591 row = idxm[i] - rstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= cstart && idxn[j] < cend){ 596 col = idxn[j] - cstart; 597 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!aij->colmap) { 600 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 604 col --; 605 #else 606 col = aij->colmap[idxn[j]] - 1; 607 #endif 608 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 609 else { 610 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } 612 } 613 } 614 } else { 615 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 616 } 617 } 618 PetscFunctionReturn(0); 619 } 620 621 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 625 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 626 { 627 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 628 PetscErrorCode ierr; 629 PetscInt nstash,reallocs; 630 InsertMode addv; 631 632 PetscFunctionBegin; 633 if (aij->donotstash || mat->nooffprocentries) { 634 PetscFunctionReturn(0); 635 } 636 637 /* make sure all processors are either in INSERTMODE or ADDMODE */ 638 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 639 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 640 mat->insertmode = addv; /* in case this processor had no cache */ 641 642 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 643 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 644 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 650 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 651 { 652 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 653 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 654 PetscErrorCode ierr; 655 PetscMPIInt n; 656 PetscInt i,j,rstart,ncols,flg; 657 PetscInt *row,*col; 658 PetscBool other_disassembled; 659 PetscScalar *val; 660 InsertMode addv = mat->insertmode; 661 662 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 663 PetscFunctionBegin; 664 if (!aij->donotstash && !mat->nooffprocentries) { 665 while (1) { 666 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 667 if (!flg) break; 668 669 for (i=0; i<n;) { 670 /* Now identify the consecutive vals belonging to the same row */ 671 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 672 if (j < n) ncols = j-i; 673 else ncols = n-i; 674 /* Now assemble all these values with a single function call */ 675 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 676 i = j; 677 } 678 } 679 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 680 } 681 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 682 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 683 684 /* determine if any processor has disassembled, if so we must 685 also disassemble ourselfs, in order that we may reassemble. */ 686 /* 687 if nonzero structure of submatrix B cannot change then we know that 688 no processor disassembled thus we can skip this stuff 689 */ 690 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 691 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 692 if (mat->was_assembled && !other_disassembled) { 693 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 694 } 695 } 696 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 697 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 698 } 699 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 700 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 701 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 702 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 703 704 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 705 aij->rowvalues = 0; 706 707 /* used by MatAXPY() */ 708 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 709 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 710 711 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 712 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 718 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 719 { 720 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 721 PetscErrorCode ierr; 722 723 PetscFunctionBegin; 724 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 725 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroRows_MPIAIJ" 731 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 736 PetscInt i,*owners = A->rmap->range; 737 PetscInt *nprocs,j,idx,nsends,row; 738 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 739 PetscInt *rvalues,count,base,slen,*source; 740 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 741 MPI_Comm comm = ((PetscObject)A)->comm; 742 MPI_Request *send_waits,*recv_waits; 743 MPI_Status recv_status,*send_status; 744 const PetscScalar *xx; 745 PetscScalar *bb; 746 #if defined(PETSC_DEBUG) 747 PetscBool found = PETSC_FALSE; 748 #endif 749 750 PetscFunctionBegin; 751 /* first count number of contributors to each processor */ 752 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 753 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 754 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 755 j = 0; 756 for (i=0; i<N; i++) { 757 if (lastidx > (idx = rows[i])) j = 0; 758 lastidx = idx; 759 for (; j<size; j++) { 760 if (idx >= owners[j] && idx < owners[j+1]) { 761 nprocs[2*j]++; 762 nprocs[2*j+1] = 1; 763 owner[i] = j; 764 #if defined(PETSC_DEBUG) 765 found = PETSC_TRUE; 766 #endif 767 break; 768 } 769 } 770 #if defined(PETSC_DEBUG) 771 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 772 found = PETSC_FALSE; 773 #endif 774 } 775 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 776 777 if (A->nooffproczerorows) { 778 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 779 nrecvs = nsends; 780 nmax = N; 781 } else { 782 /* inform other processors of number of messages and max length*/ 783 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 784 } 785 786 /* post receives: */ 787 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 788 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 789 for (i=0; i<nrecvs; i++) { 790 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 791 } 792 793 /* do sends: 794 1) starts[i] gives the starting index in svalues for stuff going to 795 the ith processor 796 */ 797 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 798 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 799 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 800 starts[0] = 0; 801 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 802 for (i=0; i<N; i++) { 803 svalues[starts[owner[i]]++] = rows[i]; 804 } 805 806 starts[0] = 0; 807 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 808 count = 0; 809 for (i=0; i<size; i++) { 810 if (nprocs[2*i+1]) { 811 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 812 } 813 } 814 ierr = PetscFree(starts);CHKERRQ(ierr); 815 816 base = owners[rank]; 817 818 /* wait on receives */ 819 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 820 count = nrecvs; slen = 0; 821 while (count) { 822 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 823 /* unpack receives into our local space */ 824 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 825 source[imdex] = recv_status.MPI_SOURCE; 826 lens[imdex] = n; 827 slen += n; 828 count--; 829 } 830 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 831 832 /* move the data into the send scatter */ 833 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 834 count = 0; 835 for (i=0; i<nrecvs; i++) { 836 values = rvalues + i*nmax; 837 for (j=0; j<lens[i]; j++) { 838 lrows[count++] = values[j] - base; 839 } 840 } 841 ierr = PetscFree(rvalues);CHKERRQ(ierr); 842 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 843 ierr = PetscFree(owner);CHKERRQ(ierr); 844 ierr = PetscFree(nprocs);CHKERRQ(ierr); 845 846 /* fix right hand side if needed */ 847 if (x && b) { 848 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 849 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 850 for (i=0; i<slen; i++) { 851 bb[lrows[i]] = diag*xx[lrows[i]]; 852 } 853 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 855 } 856 /* 857 Zero the required rows. If the "diagonal block" of the matrix 858 is square and the user wishes to set the diagonal we use separate 859 code so that MatSetValues() is not called for each diagonal allocating 860 new memory, thus calling lots of mallocs and slowing things down. 861 862 */ 863 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 864 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 865 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 866 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 867 } else if (diag != 0.0) { 868 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 869 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 870 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 871 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 872 } 873 for (i = 0; i < slen; i++) { 874 row = lrows[i] + rstart; 875 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 876 } 877 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 878 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 879 } else { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 } 882 ierr = PetscFree(lrows);CHKERRQ(ierr); 883 884 /* wait on sends */ 885 if (nsends) { 886 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 887 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 888 ierr = PetscFree(send_status);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(send_waits);CHKERRQ(ierr); 891 ierr = PetscFree(svalues);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 897 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 898 { 899 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 900 PetscErrorCode ierr; 901 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 902 PetscInt i,*owners = A->rmap->range; 903 PetscInt *nprocs,j,idx,nsends; 904 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 905 PetscInt *rvalues,count,base,slen,*source; 906 PetscInt *lens,*lrows,*values,m; 907 MPI_Comm comm = ((PetscObject)A)->comm; 908 MPI_Request *send_waits,*recv_waits; 909 MPI_Status recv_status,*send_status; 910 const PetscScalar *xx; 911 PetscScalar *bb,*mask; 912 Vec xmask,lmask; 913 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 914 const PetscInt *aj, *ii,*ridx; 915 PetscScalar *aa; 916 #if defined(PETSC_DEBUG) 917 PetscBool found = PETSC_FALSE; 918 #endif 919 920 PetscFunctionBegin; 921 /* first count number of contributors to each processor */ 922 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 923 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 924 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 925 j = 0; 926 for (i=0; i<N; i++) { 927 if (lastidx > (idx = rows[i])) j = 0; 928 lastidx = idx; 929 for (; j<size; j++) { 930 if (idx >= owners[j] && idx < owners[j+1]) { 931 nprocs[2*j]++; 932 nprocs[2*j+1] = 1; 933 owner[i] = j; 934 #if defined(PETSC_DEBUG) 935 found = PETSC_TRUE; 936 #endif 937 break; 938 } 939 } 940 #if defined(PETSC_DEBUG) 941 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 942 found = PETSC_FALSE; 943 #endif 944 } 945 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 946 947 /* inform other processors of number of messages and max length*/ 948 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 949 950 /* post receives: */ 951 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 952 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 953 for (i=0; i<nrecvs; i++) { 954 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 955 } 956 957 /* do sends: 958 1) starts[i] gives the starting index in svalues for stuff going to 959 the ith processor 960 */ 961 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 963 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 964 starts[0] = 0; 965 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 966 for (i=0; i<N; i++) { 967 svalues[starts[owner[i]]++] = rows[i]; 968 } 969 970 starts[0] = 0; 971 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 972 count = 0; 973 for (i=0; i<size; i++) { 974 if (nprocs[2*i+1]) { 975 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 976 } 977 } 978 ierr = PetscFree(starts);CHKERRQ(ierr); 979 980 base = owners[rank]; 981 982 /* wait on receives */ 983 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 984 count = nrecvs; slen = 0; 985 while (count) { 986 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 987 /* unpack receives into our local space */ 988 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 989 source[imdex] = recv_status.MPI_SOURCE; 990 lens[imdex] = n; 991 slen += n; 992 count--; 993 } 994 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 995 996 /* move the data into the send scatter */ 997 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 998 count = 0; 999 for (i=0; i<nrecvs; i++) { 1000 values = rvalues + i*nmax; 1001 for (j=0; j<lens[i]; j++) { 1002 lrows[count++] = values[j] - base; 1003 } 1004 } 1005 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1006 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1007 ierr = PetscFree(owner);CHKERRQ(ierr); 1008 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1009 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1010 1011 /* zero diagonal part of matrix */ 1012 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1013 1014 /* handle off diagonal part of matrix */ 1015 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1016 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1017 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1018 for (i=0; i<slen; i++) { 1019 bb[lrows[i]] = 1; 1020 } 1021 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1022 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1023 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1024 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1025 if (x) { 1026 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1029 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1030 } 1031 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1032 1033 /* remove zeroed rows of off diagonal matrix */ 1034 ii = aij->i; 1035 for (i=0; i<slen; i++) { 1036 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1037 } 1038 1039 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1040 if (aij->compressedrow.use){ 1041 m = aij->compressedrow.nrows; 1042 ii = aij->compressedrow.i; 1043 ridx = aij->compressedrow.rindex; 1044 for (i=0; i<m; i++){ 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 1049 for (j=0; j<n; j++) { 1050 if (PetscAbsScalar(mask[*aj])) { 1051 if (b) bb[*ridx] -= *aa*xx[*aj]; 1052 *aa = 0.0; 1053 } 1054 aa++; 1055 aj++; 1056 } 1057 ridx++; 1058 } 1059 } else { /* do not use compressed row format */ 1060 m = l->B->rmap->n; 1061 for (i=0; i<m; i++) { 1062 n = ii[i+1] - ii[i]; 1063 aj = aij->j + ii[i]; 1064 aa = aij->a + ii[i]; 1065 for (j=0; j<n; j++) { 1066 if (PetscAbsScalar(mask[*aj])) { 1067 if (b) bb[i] -= *aa*xx[*aj]; 1068 *aa = 0.0; 1069 } 1070 aa++; 1071 aj++; 1072 } 1073 } 1074 } 1075 if (x) { 1076 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1077 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1078 } 1079 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1080 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1081 ierr = PetscFree(lrows);CHKERRQ(ierr); 1082 1083 /* wait on sends */ 1084 if (nsends) { 1085 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1086 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1087 ierr = PetscFree(send_status);CHKERRQ(ierr); 1088 } 1089 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1090 ierr = PetscFree(svalues);CHKERRQ(ierr); 1091 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatMult_MPIAIJ" 1097 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 PetscErrorCode ierr; 1101 PetscInt nt; 1102 1103 PetscFunctionBegin; 1104 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1105 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1106 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1107 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1108 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1109 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1115 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1116 { 1117 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1118 PetscErrorCode ierr; 1119 1120 PetscFunctionBegin; 1121 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1127 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1128 { 1129 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1130 PetscErrorCode ierr; 1131 1132 PetscFunctionBegin; 1133 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1134 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1135 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1136 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1142 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1145 PetscErrorCode ierr; 1146 PetscBool merged; 1147 1148 PetscFunctionBegin; 1149 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1150 /* do nondiagonal part */ 1151 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1152 if (!merged) { 1153 /* send it on its way */ 1154 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 /* do local part */ 1156 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1157 /* receive remote parts: note this assumes the values are not actually */ 1158 /* added in yy until the next line, */ 1159 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 } else { 1161 /* do local part */ 1162 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1163 /* send it on its way */ 1164 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 /* values actually were received in the Begin() but we need to call this nop */ 1166 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1167 } 1168 PetscFunctionReturn(0); 1169 } 1170 1171 EXTERN_C_BEGIN 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1174 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1175 { 1176 MPI_Comm comm; 1177 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1178 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1179 IS Me,Notme; 1180 PetscErrorCode ierr; 1181 PetscInt M,N,first,last,*notme,i; 1182 PetscMPIInt size; 1183 1184 PetscFunctionBegin; 1185 1186 /* Easy test: symmetric diagonal block */ 1187 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1188 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1189 if (!*f) PetscFunctionReturn(0); 1190 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1191 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1192 if (size == 1) PetscFunctionReturn(0); 1193 1194 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1195 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1196 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1197 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1198 for (i=0; i<first; i++) notme[i] = i; 1199 for (i=last; i<M; i++) notme[i-last+first] = i; 1200 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1201 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1202 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1203 Aoff = Aoffs[0]; 1204 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1205 Boff = Boffs[0]; 1206 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1207 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1208 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1209 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1210 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1211 ierr = PetscFree(notme);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 EXTERN_C_END 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1218 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1219 { 1220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 /* do nondiagonal part */ 1225 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1226 /* send it on its way */ 1227 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1228 /* do local part */ 1229 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1230 /* receive remote parts */ 1231 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 /* 1236 This only works correctly for square matrices where the subblock A->A is the 1237 diagonal block 1238 */ 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1241 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1242 { 1243 PetscErrorCode ierr; 1244 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1245 1246 PetscFunctionBegin; 1247 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1248 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1249 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatScale_MPIAIJ" 1255 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1256 { 1257 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1262 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatDestroy_MPIAIJ" 1268 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 #if defined(PETSC_USE_LOG) 1275 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1276 #endif 1277 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1278 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1279 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1280 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1281 #if defined (PETSC_USE_CTABLE) 1282 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1283 #else 1284 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1285 #endif 1286 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1287 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1288 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1289 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1290 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1291 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1292 1293 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1307 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1308 { 1309 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1310 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1311 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1312 PetscErrorCode ierr; 1313 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1314 int fd; 1315 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1316 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1317 PetscScalar *column_values; 1318 PetscInt message_count,flowcontrolcount; 1319 FILE *file; 1320 1321 PetscFunctionBegin; 1322 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1323 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1324 nz = A->nz + B->nz; 1325 if (!rank) { 1326 header[0] = MAT_FILE_CLASSID; 1327 header[1] = mat->rmap->N; 1328 header[2] = mat->cmap->N; 1329 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1330 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1331 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1332 /* get largest number of rows any processor has */ 1333 rlen = mat->rmap->n; 1334 range = mat->rmap->range; 1335 for (i=1; i<size; i++) { 1336 rlen = PetscMax(rlen,range[i+1] - range[i]); 1337 } 1338 } else { 1339 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 rlen = mat->rmap->n; 1341 } 1342 1343 /* load up the local row counts */ 1344 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1345 for (i=0; i<mat->rmap->n; i++) { 1346 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 } 1348 1349 /* store the row lengths to the file */ 1350 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1351 if (!rank) { 1352 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1353 for (i=1; i<size; i++) { 1354 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1355 rlen = range[i+1] - range[i]; 1356 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1357 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 } 1359 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1360 } else { 1361 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1362 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1363 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1364 } 1365 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1366 1367 /* load up the local column indices */ 1368 nzmax = nz; /* )th processor needs space a largest processor needs */ 1369 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1370 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1371 cnt = 0; 1372 for (i=0; i<mat->rmap->n; i++) { 1373 for (j=B->i[i]; j<B->i[i+1]; j++) { 1374 if ( (col = garray[B->j[j]]) > cstart) break; 1375 column_indices[cnt++] = col; 1376 } 1377 for (k=A->i[i]; k<A->i[i+1]; k++) { 1378 column_indices[cnt++] = A->j[k] + cstart; 1379 } 1380 for (; j<B->i[i+1]; j++) { 1381 column_indices[cnt++] = garray[B->j[j]]; 1382 } 1383 } 1384 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1385 1386 /* store the column indices to the file */ 1387 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1388 if (!rank) { 1389 MPI_Status status; 1390 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1391 for (i=1; i<size; i++) { 1392 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1393 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1394 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1395 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1396 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1397 } 1398 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1399 } else { 1400 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1401 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1402 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1403 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1404 } 1405 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1406 1407 /* load up the local column values */ 1408 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1409 cnt = 0; 1410 for (i=0; i<mat->rmap->n; i++) { 1411 for (j=B->i[i]; j<B->i[i+1]; j++) { 1412 if ( garray[B->j[j]] > cstart) break; 1413 column_values[cnt++] = B->a[j]; 1414 } 1415 for (k=A->i[i]; k<A->i[i+1]; k++) { 1416 column_values[cnt++] = A->a[k]; 1417 } 1418 for (; j<B->i[i+1]; j++) { 1419 column_values[cnt++] = B->a[j]; 1420 } 1421 } 1422 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1423 1424 /* store the column values to the file */ 1425 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1426 if (!rank) { 1427 MPI_Status status; 1428 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1429 for (i=1; i<size; i++) { 1430 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1431 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1432 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1433 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1434 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1435 } 1436 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1437 } else { 1438 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1439 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1440 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1441 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1442 } 1443 ierr = PetscFree(column_values);CHKERRQ(ierr); 1444 1445 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1446 if (file) { 1447 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1448 } 1449 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNCT__ 1454 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1455 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1456 { 1457 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1458 PetscErrorCode ierr; 1459 PetscMPIInt rank = aij->rank,size = aij->size; 1460 PetscBool isdraw,iascii,isbinary; 1461 PetscViewer sviewer; 1462 PetscViewerFormat format; 1463 1464 PetscFunctionBegin; 1465 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1466 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1467 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1468 if (iascii) { 1469 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1470 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1471 MatInfo info; 1472 PetscBool inodes; 1473 1474 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1475 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1476 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1478 if (!inodes) { 1479 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1480 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1481 } else { 1482 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1483 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1484 } 1485 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1486 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1487 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1488 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1489 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1490 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1491 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1492 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1493 PetscFunctionReturn(0); 1494 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1495 PetscInt inodecount,inodelimit,*inodes; 1496 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1497 if (inodes) { 1498 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1499 } else { 1500 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1504 PetscFunctionReturn(0); 1505 } 1506 } else if (isbinary) { 1507 if (size == 1) { 1508 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1509 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1510 } else { 1511 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1512 } 1513 PetscFunctionReturn(0); 1514 } else if (isdraw) { 1515 PetscDraw draw; 1516 PetscBool isnull; 1517 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1518 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1519 } 1520 1521 if (size == 1) { 1522 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1523 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1524 } else { 1525 /* assemble the entire matrix onto first processor. */ 1526 Mat A; 1527 Mat_SeqAIJ *Aloc; 1528 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1529 MatScalar *a; 1530 1531 if (mat->rmap->N > 1024) { 1532 PetscBool flg = PETSC_FALSE; 1533 1534 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1535 if (!flg) { 1536 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1537 } 1538 } 1539 1540 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1541 if (!rank) { 1542 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1543 } else { 1544 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1545 } 1546 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1547 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1548 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1549 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1550 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1551 1552 /* copy over the A part */ 1553 Aloc = (Mat_SeqAIJ*)aij->A->data; 1554 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1557 for (i=0; i<m; i++) { 1558 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1559 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1560 } 1561 aj = Aloc->j; 1562 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1563 1564 /* copy over the B part */ 1565 Aloc = (Mat_SeqAIJ*)aij->B->data; 1566 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1567 row = mat->rmap->rstart; 1568 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1569 ct = cols; 1570 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1571 for (i=0; i<m; i++) { 1572 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1573 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1574 } 1575 ierr = PetscFree(ct);CHKERRQ(ierr); 1576 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1577 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1578 /* 1579 Everyone has to call to draw the matrix since the graphics waits are 1580 synchronized across all processors that share the PetscDraw object 1581 */ 1582 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1583 if (!rank) { 1584 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1585 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1586 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1587 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1588 } 1589 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1590 ierr = MatDestroy(&A);CHKERRQ(ierr); 1591 } 1592 PetscFunctionReturn(0); 1593 } 1594 1595 #undef __FUNCT__ 1596 #define __FUNCT__ "MatView_MPIAIJ" 1597 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1598 { 1599 PetscErrorCode ierr; 1600 PetscBool iascii,isdraw,issocket,isbinary; 1601 1602 PetscFunctionBegin; 1603 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1604 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1605 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1606 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1607 if (iascii || isdraw || isbinary || issocket) { 1608 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1609 } else { 1610 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1611 } 1612 PetscFunctionReturn(0); 1613 } 1614 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatSOR_MPIAIJ" 1617 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1618 { 1619 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1620 PetscErrorCode ierr; 1621 Vec bb1 = 0; 1622 PetscBool hasop; 1623 1624 PetscFunctionBegin; 1625 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1626 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1627 } 1628 1629 if (flag == SOR_APPLY_UPPER) { 1630 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1631 PetscFunctionReturn(0); 1632 } 1633 1634 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1635 if (flag & SOR_ZERO_INITIAL_GUESS) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 its--; 1638 } 1639 1640 while (its--) { 1641 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 1644 /* update rhs: bb1 = bb - B*x */ 1645 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1646 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1647 1648 /* local sweep */ 1649 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1650 } 1651 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1652 if (flag & SOR_ZERO_INITIAL_GUESS) { 1653 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1654 its--; 1655 } 1656 while (its--) { 1657 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1659 1660 /* update rhs: bb1 = bb - B*x */ 1661 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1662 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1663 1664 /* local sweep */ 1665 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1666 } 1667 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1668 if (flag & SOR_ZERO_INITIAL_GUESS) { 1669 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1670 its--; 1671 } 1672 while (its--) { 1673 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1675 1676 /* update rhs: bb1 = bb - B*x */ 1677 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1678 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1679 1680 /* local sweep */ 1681 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1682 } 1683 } else if (flag & SOR_EISENSTAT) { 1684 Vec xx1; 1685 1686 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1687 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1688 1689 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1690 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1691 if (!mat->diag) { 1692 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1693 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1694 } 1695 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1696 if (hasop) { 1697 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1698 } else { 1699 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1700 } 1701 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1702 1703 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1704 1705 /* local sweep */ 1706 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1707 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1708 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1709 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1710 1711 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatPermute_MPIAIJ" 1717 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1718 { 1719 MPI_Comm comm; 1720 PetscInt first,local_rowsize,local_colsize; 1721 const PetscInt *rows; 1722 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1723 PetscErrorCode ierr; 1724 1725 PetscFunctionBegin; 1726 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1727 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1728 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1729 /* collect the global row permutation and invert it */ 1730 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1731 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1732 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1733 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1734 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1735 /* get the local target indices */ 1736 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1737 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1738 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1739 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1740 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1741 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1742 /* the column permutation is so much easier; 1743 make a local version of 'colp' and invert it */ 1744 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1745 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1746 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1747 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1748 /* now we just get the submatrix */ 1749 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1750 /* clean up */ 1751 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1752 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1753 PetscFunctionReturn(0); 1754 } 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1758 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1759 { 1760 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1761 Mat A = mat->A,B = mat->B; 1762 PetscErrorCode ierr; 1763 PetscReal isend[5],irecv[5]; 1764 1765 PetscFunctionBegin; 1766 info->block_size = 1.0; 1767 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1768 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1769 isend[3] = info->memory; isend[4] = info->mallocs; 1770 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1771 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1772 isend[3] += info->memory; isend[4] += info->mallocs; 1773 if (flag == MAT_LOCAL) { 1774 info->nz_used = isend[0]; 1775 info->nz_allocated = isend[1]; 1776 info->nz_unneeded = isend[2]; 1777 info->memory = isend[3]; 1778 info->mallocs = isend[4]; 1779 } else if (flag == MAT_GLOBAL_MAX) { 1780 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1781 info->nz_used = irecv[0]; 1782 info->nz_allocated = irecv[1]; 1783 info->nz_unneeded = irecv[2]; 1784 info->memory = irecv[3]; 1785 info->mallocs = irecv[4]; 1786 } else if (flag == MAT_GLOBAL_SUM) { 1787 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1788 info->nz_used = irecv[0]; 1789 info->nz_allocated = irecv[1]; 1790 info->nz_unneeded = irecv[2]; 1791 info->memory = irecv[3]; 1792 info->mallocs = irecv[4]; 1793 } 1794 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1795 info->fill_ratio_needed = 0; 1796 info->factor_mallocs = 0; 1797 PetscFunctionReturn(0); 1798 } 1799 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "MatSetOption_MPIAIJ" 1802 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1803 { 1804 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1805 PetscErrorCode ierr; 1806 1807 PetscFunctionBegin; 1808 switch (op) { 1809 case MAT_NEW_NONZERO_LOCATIONS: 1810 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1811 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1812 case MAT_KEEP_NONZERO_PATTERN: 1813 case MAT_NEW_NONZERO_LOCATION_ERR: 1814 case MAT_USE_INODES: 1815 case MAT_IGNORE_ZERO_ENTRIES: 1816 MatCheckPreallocated(A,1); 1817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1818 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_ROW_ORIENTED: 1821 a->roworiented = flg; 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1824 break; 1825 case MAT_NEW_DIAGONALS: 1826 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1827 break; 1828 case MAT_IGNORE_OFF_PROC_ENTRIES: 1829 a->donotstash = flg; 1830 break; 1831 case MAT_SPD: 1832 A->spd_set = PETSC_TRUE; 1833 A->spd = flg; 1834 if (flg) { 1835 A->symmetric = PETSC_TRUE; 1836 A->structurally_symmetric = PETSC_TRUE; 1837 A->symmetric_set = PETSC_TRUE; 1838 A->structurally_symmetric_set = PETSC_TRUE; 1839 } 1840 break; 1841 case MAT_SYMMETRIC: 1842 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1843 break; 1844 case MAT_STRUCTURALLY_SYMMETRIC: 1845 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1846 break; 1847 case MAT_HERMITIAN: 1848 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1849 break; 1850 case MAT_SYMMETRY_ETERNAL: 1851 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1852 break; 1853 default: 1854 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatGetRow_MPIAIJ" 1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1862 { 1863 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1864 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1865 PetscErrorCode ierr; 1866 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1867 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1868 PetscInt *cmap,*idx_p; 1869 1870 PetscFunctionBegin; 1871 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1872 mat->getrowactive = PETSC_TRUE; 1873 1874 if (!mat->rowvalues && (idx || v)) { 1875 /* 1876 allocate enough space to hold information from the longest row. 1877 */ 1878 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1879 PetscInt max = 1,tmp; 1880 for (i=0; i<matin->rmap->n; i++) { 1881 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1882 if (max < tmp) { max = tmp; } 1883 } 1884 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1885 } 1886 1887 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1888 lrow = row - rstart; 1889 1890 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1891 if (!v) {pvA = 0; pvB = 0;} 1892 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1893 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1894 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1895 nztot = nzA + nzB; 1896 1897 cmap = mat->garray; 1898 if (v || idx) { 1899 if (nztot) { 1900 /* Sort by increasing column numbers, assuming A and B already sorted */ 1901 PetscInt imark = -1; 1902 if (v) { 1903 *v = v_p = mat->rowvalues; 1904 for (i=0; i<nzB; i++) { 1905 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1906 else break; 1907 } 1908 imark = i; 1909 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1910 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1911 } 1912 if (idx) { 1913 *idx = idx_p = mat->rowindices; 1914 if (imark > -1) { 1915 for (i=0; i<imark; i++) { 1916 idx_p[i] = cmap[cworkB[i]]; 1917 } 1918 } else { 1919 for (i=0; i<nzB; i++) { 1920 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1921 else break; 1922 } 1923 imark = i; 1924 } 1925 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1926 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1927 } 1928 } else { 1929 if (idx) *idx = 0; 1930 if (v) *v = 0; 1931 } 1932 } 1933 *nz = nztot; 1934 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1935 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1941 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1942 { 1943 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1944 1945 PetscFunctionBegin; 1946 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1947 aij->getrowactive = PETSC_FALSE; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatNorm_MPIAIJ" 1953 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1954 { 1955 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1956 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1957 PetscErrorCode ierr; 1958 PetscInt i,j,cstart = mat->cmap->rstart; 1959 PetscReal sum = 0.0; 1960 MatScalar *v; 1961 1962 PetscFunctionBegin; 1963 if (aij->size == 1) { 1964 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1965 } else { 1966 if (type == NORM_FROBENIUS) { 1967 v = amat->a; 1968 for (i=0; i<amat->nz; i++) { 1969 #if defined(PETSC_USE_COMPLEX) 1970 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1971 #else 1972 sum += (*v)*(*v); v++; 1973 #endif 1974 } 1975 v = bmat->a; 1976 for (i=0; i<bmat->nz; i++) { 1977 #if defined(PETSC_USE_COMPLEX) 1978 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1979 #else 1980 sum += (*v)*(*v); v++; 1981 #endif 1982 } 1983 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1984 *norm = PetscSqrtReal(*norm); 1985 } else if (type == NORM_1) { /* max column norm */ 1986 PetscReal *tmp,*tmp2; 1987 PetscInt *jj,*garray = aij->garray; 1988 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1989 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1990 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1991 *norm = 0.0; 1992 v = amat->a; jj = amat->j; 1993 for (j=0; j<amat->nz; j++) { 1994 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1995 } 1996 v = bmat->a; jj = bmat->j; 1997 for (j=0; j<bmat->nz; j++) { 1998 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1999 } 2000 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2001 for (j=0; j<mat->cmap->N; j++) { 2002 if (tmp2[j] > *norm) *norm = tmp2[j]; 2003 } 2004 ierr = PetscFree(tmp);CHKERRQ(ierr); 2005 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2006 } else if (type == NORM_INFINITY) { /* max row norm */ 2007 PetscReal ntemp = 0.0; 2008 for (j=0; j<aij->A->rmap->n; j++) { 2009 v = amat->a + amat->i[j]; 2010 sum = 0.0; 2011 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2012 sum += PetscAbsScalar(*v); v++; 2013 } 2014 v = bmat->a + bmat->i[j]; 2015 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2016 sum += PetscAbsScalar(*v); v++; 2017 } 2018 if (sum > ntemp) ntemp = sum; 2019 } 2020 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2021 } else { 2022 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2023 } 2024 } 2025 PetscFunctionReturn(0); 2026 } 2027 2028 #undef __FUNCT__ 2029 #define __FUNCT__ "MatTranspose_MPIAIJ" 2030 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2031 { 2032 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2033 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2034 PetscErrorCode ierr; 2035 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2036 PetscInt cstart=A->cmap->rstart,ncol; 2037 Mat B; 2038 MatScalar *array; 2039 2040 PetscFunctionBegin; 2041 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2042 2043 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2044 ai = Aloc->i; aj = Aloc->j; 2045 bi = Bloc->i; bj = Bloc->j; 2046 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2047 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2048 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2049 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2050 for (i=0; i<ai[ma]; i++){ 2051 d_nnz[aj[i]] ++; 2052 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2053 } 2054 2055 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2056 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2057 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2058 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2059 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2060 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2061 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2062 } else { 2063 B = *matout; 2064 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2065 for (i=0; i<ai[ma]; i++){ 2066 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2067 } 2068 } 2069 2070 /* copy over the A part */ 2071 array = Aloc->a; 2072 row = A->rmap->rstart; 2073 for (i=0; i<ma; i++) { 2074 ncol = ai[i+1]-ai[i]; 2075 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2076 row++; array += ncol; aj += ncol; 2077 } 2078 aj = Aloc->j; 2079 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2080 2081 /* copy over the B part */ 2082 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2083 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2084 array = Bloc->a; 2085 row = A->rmap->rstart; 2086 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2087 cols_tmp = cols; 2088 for (i=0; i<mb; i++) { 2089 ncol = bi[i+1]-bi[i]; 2090 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2091 row++; array += ncol; cols_tmp += ncol; 2092 } 2093 ierr = PetscFree(cols);CHKERRQ(ierr); 2094 2095 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2096 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2097 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2098 *matout = B; 2099 } else { 2100 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2101 } 2102 PetscFunctionReturn(0); 2103 } 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2107 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2108 { 2109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2110 Mat a = aij->A,b = aij->B; 2111 PetscErrorCode ierr; 2112 PetscInt s1,s2,s3; 2113 2114 PetscFunctionBegin; 2115 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2116 if (rr) { 2117 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2118 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2119 /* Overlap communication with computation. */ 2120 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2121 } 2122 if (ll) { 2123 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2124 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2125 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2126 } 2127 /* scale the diagonal block */ 2128 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2129 2130 if (rr) { 2131 /* Do a scatter end and then right scale the off-diagonal block */ 2132 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2133 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2134 } 2135 2136 PetscFunctionReturn(0); 2137 } 2138 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2141 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2142 { 2143 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatEqual_MPIAIJ" 2153 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2154 { 2155 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2156 Mat a,b,c,d; 2157 PetscBool flg; 2158 PetscErrorCode ierr; 2159 2160 PetscFunctionBegin; 2161 a = matA->A; b = matA->B; 2162 c = matB->A; d = matB->B; 2163 2164 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2165 if (flg) { 2166 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2167 } 2168 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2169 PetscFunctionReturn(0); 2170 } 2171 2172 #undef __FUNCT__ 2173 #define __FUNCT__ "MatCopy_MPIAIJ" 2174 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2175 { 2176 PetscErrorCode ierr; 2177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2178 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2179 2180 PetscFunctionBegin; 2181 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2182 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2183 /* because of the column compression in the off-processor part of the matrix a->B, 2184 the number of columns in a->B and b->B may be different, hence we cannot call 2185 the MatCopy() directly on the two parts. If need be, we can provide a more 2186 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2187 then copying the submatrices */ 2188 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2189 } else { 2190 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2191 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2192 } 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatSetUp_MPIAIJ" 2198 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2199 { 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 #undef __FUNCT__ 2208 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2209 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2210 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2211 { 2212 PetscInt i,m=Y->rmap->N; 2213 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2214 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2215 const PetscInt *xi = x->i,*yi = y->i; 2216 2217 PetscFunctionBegin; 2218 /* Set the number of nonzeros in the new matrix */ 2219 for(i=0; i<m; i++) { 2220 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2221 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2222 nnz[i] = 0; 2223 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2224 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2225 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2226 nnz[i]++; 2227 } 2228 for (; k<nzy; k++) nnz[i]++; 2229 } 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatAXPY_MPIAIJ" 2235 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 PetscInt i; 2239 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2240 PetscBLASInt bnz,one=1; 2241 Mat_SeqAIJ *x,*y; 2242 2243 PetscFunctionBegin; 2244 if (str == SAME_NONZERO_PATTERN) { 2245 PetscScalar alpha = a; 2246 x = (Mat_SeqAIJ *)xx->A->data; 2247 y = (Mat_SeqAIJ *)yy->A->data; 2248 bnz = PetscBLASIntCast(x->nz); 2249 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2250 x = (Mat_SeqAIJ *)xx->B->data; 2251 y = (Mat_SeqAIJ *)yy->B->data; 2252 bnz = PetscBLASIntCast(x->nz); 2253 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2254 } else if (str == SUBSET_NONZERO_PATTERN) { 2255 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2256 2257 x = (Mat_SeqAIJ *)xx->B->data; 2258 y = (Mat_SeqAIJ *)yy->B->data; 2259 if (y->xtoy && y->XtoY != xx->B) { 2260 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2261 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2262 } 2263 if (!y->xtoy) { /* get xtoy */ 2264 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2265 y->XtoY = xx->B; 2266 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2267 } 2268 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2269 } else { 2270 Mat B; 2271 PetscInt *nnz_d,*nnz_o; 2272 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2273 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2274 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2275 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2276 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2277 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2278 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2279 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2280 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2281 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2282 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2283 ierr = MatHeaderReplace(Y,B); 2284 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2285 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2286 } 2287 PetscFunctionReturn(0); 2288 } 2289 2290 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2291 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatConjugate_MPIAIJ" 2294 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2295 { 2296 #if defined(PETSC_USE_COMPLEX) 2297 PetscErrorCode ierr; 2298 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2299 2300 PetscFunctionBegin; 2301 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2302 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2303 #else 2304 PetscFunctionBegin; 2305 #endif 2306 PetscFunctionReturn(0); 2307 } 2308 2309 #undef __FUNCT__ 2310 #define __FUNCT__ "MatRealPart_MPIAIJ" 2311 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2312 { 2313 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2318 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 #undef __FUNCT__ 2323 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2324 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2325 { 2326 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2327 PetscErrorCode ierr; 2328 2329 PetscFunctionBegin; 2330 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2331 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2332 PetscFunctionReturn(0); 2333 } 2334 2335 #ifdef PETSC_HAVE_PBGL 2336 2337 #include <boost/parallel/mpi/bsp_process_group.hpp> 2338 #include <boost/graph/distributed/ilu_default_graph.hpp> 2339 #include <boost/graph/distributed/ilu_0_block.hpp> 2340 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2341 #include <boost/graph/distributed/petsc/interface.hpp> 2342 #include <boost/multi_array.hpp> 2343 #include <boost/parallel/distributed_property_map->hpp> 2344 2345 #undef __FUNCT__ 2346 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2347 /* 2348 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2349 */ 2350 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2351 { 2352 namespace petsc = boost::distributed::petsc; 2353 2354 namespace graph_dist = boost::graph::distributed; 2355 using boost::graph::distributed::ilu_default::process_group_type; 2356 using boost::graph::ilu_permuted; 2357 2358 PetscBool row_identity, col_identity; 2359 PetscContainer c; 2360 PetscInt m, n, M, N; 2361 PetscErrorCode ierr; 2362 2363 PetscFunctionBegin; 2364 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2365 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2366 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2367 if (!row_identity || !col_identity) { 2368 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2369 } 2370 2371 process_group_type pg; 2372 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2373 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2374 lgraph_type& level_graph = *lgraph_p; 2375 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2376 2377 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2378 ilu_permuted(level_graph); 2379 2380 /* put together the new matrix */ 2381 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2382 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2383 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2384 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2385 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2386 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2387 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2389 2390 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2391 ierr = PetscContainerSetPointer(c, lgraph_p); 2392 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2393 ierr = PetscContainerDestroy(&c); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2399 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2400 { 2401 PetscFunctionBegin; 2402 PetscFunctionReturn(0); 2403 } 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatSolve_MPIAIJ" 2407 /* 2408 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2409 */ 2410 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2411 { 2412 namespace graph_dist = boost::graph::distributed; 2413 2414 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2415 lgraph_type* lgraph_p; 2416 PetscContainer c; 2417 PetscErrorCode ierr; 2418 2419 PetscFunctionBegin; 2420 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2421 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2422 ierr = VecCopy(b, x);CHKERRQ(ierr); 2423 2424 PetscScalar* array_x; 2425 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2426 PetscInt sx; 2427 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2428 2429 PetscScalar* array_b; 2430 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2431 PetscInt sb; 2432 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2433 2434 lgraph_type& level_graph = *lgraph_p; 2435 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2436 2437 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2438 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2439 ref_x(array_x, boost::extents[num_vertices(graph)]); 2440 2441 typedef boost::iterator_property_map<array_ref_type::iterator, 2442 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2443 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2444 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2445 2446 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2447 2448 PetscFunctionReturn(0); 2449 } 2450 #endif 2451 2452 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2453 PetscInt nzlocal,nsends,nrecvs; 2454 PetscMPIInt *send_rank,*recv_rank; 2455 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2456 PetscScalar *sbuf_a,**rbuf_a; 2457 PetscErrorCode (*Destroy)(Mat); 2458 } Mat_Redundant; 2459 2460 #undef __FUNCT__ 2461 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2462 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2463 { 2464 PetscErrorCode ierr; 2465 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2466 PetscInt i; 2467 2468 PetscFunctionBegin; 2469 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2470 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2471 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2472 for (i=0; i<redund->nrecvs; i++){ 2473 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2474 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2475 } 2476 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2477 ierr = PetscFree(redund);CHKERRQ(ierr); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatDestroy_MatRedundant" 2483 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2484 { 2485 PetscErrorCode ierr; 2486 PetscContainer container; 2487 Mat_Redundant *redund=PETSC_NULL; 2488 2489 PetscFunctionBegin; 2490 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2491 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2492 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2493 A->ops->destroy = redund->Destroy; 2494 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2495 if (A->ops->destroy) { 2496 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2497 } 2498 PetscFunctionReturn(0); 2499 } 2500 2501 #undef __FUNCT__ 2502 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2503 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2504 { 2505 PetscMPIInt rank,size; 2506 MPI_Comm comm=((PetscObject)mat)->comm; 2507 PetscErrorCode ierr; 2508 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2509 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2510 PetscInt *rowrange=mat->rmap->range; 2511 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2512 Mat A=aij->A,B=aij->B,C=*matredundant; 2513 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2514 PetscScalar *sbuf_a; 2515 PetscInt nzlocal=a->nz+b->nz; 2516 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2517 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2518 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2519 MatScalar *aworkA,*aworkB; 2520 PetscScalar *vals; 2521 PetscMPIInt tag1,tag2,tag3,imdex; 2522 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2523 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2524 MPI_Status recv_status,*send_status; 2525 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2526 PetscInt **rbuf_j=PETSC_NULL; 2527 PetscScalar **rbuf_a=PETSC_NULL; 2528 Mat_Redundant *redund=PETSC_NULL; 2529 PetscContainer container; 2530 2531 PetscFunctionBegin; 2532 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2533 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2534 2535 if (reuse == MAT_REUSE_MATRIX) { 2536 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2537 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2538 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2539 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2540 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2541 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2542 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2543 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2544 2545 nsends = redund->nsends; 2546 nrecvs = redund->nrecvs; 2547 send_rank = redund->send_rank; 2548 recv_rank = redund->recv_rank; 2549 sbuf_nz = redund->sbuf_nz; 2550 rbuf_nz = redund->rbuf_nz; 2551 sbuf_j = redund->sbuf_j; 2552 sbuf_a = redund->sbuf_a; 2553 rbuf_j = redund->rbuf_j; 2554 rbuf_a = redund->rbuf_a; 2555 } 2556 2557 if (reuse == MAT_INITIAL_MATRIX){ 2558 PetscMPIInt subrank,subsize; 2559 PetscInt nleftover,np_subcomm; 2560 /* get the destination processors' id send_rank, nsends and nrecvs */ 2561 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2562 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2563 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2564 np_subcomm = size/nsubcomm; 2565 nleftover = size - nsubcomm*np_subcomm; 2566 nsends = 0; nrecvs = 0; 2567 for (i=0; i<size; i++){ /* i=rank*/ 2568 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2569 send_rank[nsends] = i; nsends++; 2570 recv_rank[nrecvs++] = i; 2571 } 2572 } 2573 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2574 i = size-nleftover-1; 2575 j = 0; 2576 while (j < nsubcomm - nleftover){ 2577 send_rank[nsends++] = i; 2578 i--; j++; 2579 } 2580 } 2581 2582 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2583 for (i=0; i<nleftover; i++){ 2584 recv_rank[nrecvs++] = size-nleftover+i; 2585 } 2586 } 2587 2588 /* allocate sbuf_j, sbuf_a */ 2589 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2590 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2591 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2592 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2593 2594 /* copy mat's local entries into the buffers */ 2595 if (reuse == MAT_INITIAL_MATRIX){ 2596 rownz_max = 0; 2597 rptr = sbuf_j; 2598 cols = sbuf_j + rend-rstart + 1; 2599 vals = sbuf_a; 2600 rptr[0] = 0; 2601 for (i=0; i<rend-rstart; i++){ 2602 row = i + rstart; 2603 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2604 ncols = nzA + nzB; 2605 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2606 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2607 /* load the column indices for this row into cols */ 2608 lwrite = 0; 2609 for (l=0; l<nzB; l++) { 2610 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2611 vals[lwrite] = aworkB[l]; 2612 cols[lwrite++] = ctmp; 2613 } 2614 } 2615 for (l=0; l<nzA; l++){ 2616 vals[lwrite] = aworkA[l]; 2617 cols[lwrite++] = cstart + cworkA[l]; 2618 } 2619 for (l=0; l<nzB; l++) { 2620 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2621 vals[lwrite] = aworkB[l]; 2622 cols[lwrite++] = ctmp; 2623 } 2624 } 2625 vals += ncols; 2626 cols += ncols; 2627 rptr[i+1] = rptr[i] + ncols; 2628 if (rownz_max < ncols) rownz_max = ncols; 2629 } 2630 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2631 } else { /* only copy matrix values into sbuf_a */ 2632 rptr = sbuf_j; 2633 vals = sbuf_a; 2634 rptr[0] = 0; 2635 for (i=0; i<rend-rstart; i++){ 2636 row = i + rstart; 2637 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2638 ncols = nzA + nzB; 2639 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2640 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2641 lwrite = 0; 2642 for (l=0; l<nzB; l++) { 2643 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2644 } 2645 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2646 for (l=0; l<nzB; l++) { 2647 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2648 } 2649 vals += ncols; 2650 rptr[i+1] = rptr[i] + ncols; 2651 } 2652 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2653 2654 /* send nzlocal to others, and recv other's nzlocal */ 2655 /*--------------------------------------------------*/ 2656 if (reuse == MAT_INITIAL_MATRIX){ 2657 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2658 s_waits2 = s_waits3 + nsends; 2659 s_waits1 = s_waits2 + nsends; 2660 r_waits1 = s_waits1 + nsends; 2661 r_waits2 = r_waits1 + nrecvs; 2662 r_waits3 = r_waits2 + nrecvs; 2663 } else { 2664 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2665 r_waits3 = s_waits3 + nsends; 2666 } 2667 2668 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2669 if (reuse == MAT_INITIAL_MATRIX){ 2670 /* get new tags to keep the communication clean */ 2671 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2672 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2673 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2674 2675 /* post receives of other's nzlocal */ 2676 for (i=0; i<nrecvs; i++){ 2677 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2678 } 2679 /* send nzlocal to others */ 2680 for (i=0; i<nsends; i++){ 2681 sbuf_nz[i] = nzlocal; 2682 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2683 } 2684 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2685 count = nrecvs; 2686 while (count) { 2687 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2688 recv_rank[imdex] = recv_status.MPI_SOURCE; 2689 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2690 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2691 2692 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2693 rbuf_nz[imdex] += i + 2; 2694 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2695 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2696 count--; 2697 } 2698 /* wait on sends of nzlocal */ 2699 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2700 /* send mat->i,j to others, and recv from other's */ 2701 /*------------------------------------------------*/ 2702 for (i=0; i<nsends; i++){ 2703 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2704 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2705 } 2706 /* wait on receives of mat->i,j */ 2707 /*------------------------------*/ 2708 count = nrecvs; 2709 while (count) { 2710 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2711 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2712 count--; 2713 } 2714 /* wait on sends of mat->i,j */ 2715 /*---------------------------*/ 2716 if (nsends) { 2717 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2718 } 2719 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2720 2721 /* post receives, send and receive mat->a */ 2722 /*----------------------------------------*/ 2723 for (imdex=0; imdex<nrecvs; imdex++) { 2724 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2725 } 2726 for (i=0; i<nsends; i++){ 2727 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2728 } 2729 count = nrecvs; 2730 while (count) { 2731 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2732 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2733 count--; 2734 } 2735 if (nsends) { 2736 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2737 } 2738 2739 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2740 2741 /* create redundant matrix */ 2742 /*-------------------------*/ 2743 if (reuse == MAT_INITIAL_MATRIX){ 2744 /* compute rownz_max for preallocation */ 2745 for (imdex=0; imdex<nrecvs; imdex++){ 2746 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2747 rptr = rbuf_j[imdex]; 2748 for (i=0; i<j; i++){ 2749 ncols = rptr[i+1] - rptr[i]; 2750 if (rownz_max < ncols) rownz_max = ncols; 2751 } 2752 } 2753 2754 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2755 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2756 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2757 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2758 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2759 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2760 } else { 2761 C = *matredundant; 2762 } 2763 2764 /* insert local matrix entries */ 2765 rptr = sbuf_j; 2766 cols = sbuf_j + rend-rstart + 1; 2767 vals = sbuf_a; 2768 for (i=0; i<rend-rstart; i++){ 2769 row = i + rstart; 2770 ncols = rptr[i+1] - rptr[i]; 2771 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2772 vals += ncols; 2773 cols += ncols; 2774 } 2775 /* insert received matrix entries */ 2776 for (imdex=0; imdex<nrecvs; imdex++){ 2777 rstart = rowrange[recv_rank[imdex]]; 2778 rend = rowrange[recv_rank[imdex]+1]; 2779 rptr = rbuf_j[imdex]; 2780 cols = rbuf_j[imdex] + rend-rstart + 1; 2781 vals = rbuf_a[imdex]; 2782 for (i=0; i<rend-rstart; i++){ 2783 row = i + rstart; 2784 ncols = rptr[i+1] - rptr[i]; 2785 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2786 vals += ncols; 2787 cols += ncols; 2788 } 2789 } 2790 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2791 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2792 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2793 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2794 if (reuse == MAT_INITIAL_MATRIX) { 2795 PetscContainer container; 2796 *matredundant = C; 2797 /* create a supporting struct and attach it to C for reuse */ 2798 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2799 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2800 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2801 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2802 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2803 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2804 2805 redund->nzlocal = nzlocal; 2806 redund->nsends = nsends; 2807 redund->nrecvs = nrecvs; 2808 redund->send_rank = send_rank; 2809 redund->recv_rank = recv_rank; 2810 redund->sbuf_nz = sbuf_nz; 2811 redund->rbuf_nz = rbuf_nz; 2812 redund->sbuf_j = sbuf_j; 2813 redund->sbuf_a = sbuf_a; 2814 redund->rbuf_j = rbuf_j; 2815 redund->rbuf_a = rbuf_a; 2816 2817 redund->Destroy = C->ops->destroy; 2818 C->ops->destroy = MatDestroy_MatRedundant; 2819 } 2820 PetscFunctionReturn(0); 2821 } 2822 2823 #undef __FUNCT__ 2824 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2825 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2826 { 2827 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2828 PetscErrorCode ierr; 2829 PetscInt i,*idxb = 0; 2830 PetscScalar *va,*vb; 2831 Vec vtmp; 2832 2833 PetscFunctionBegin; 2834 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2835 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2836 if (idx) { 2837 for (i=0; i<A->rmap->n; i++) { 2838 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2839 } 2840 } 2841 2842 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2843 if (idx) { 2844 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2845 } 2846 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2847 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2848 2849 for (i=0; i<A->rmap->n; i++){ 2850 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2851 va[i] = vb[i]; 2852 if (idx) idx[i] = a->garray[idxb[i]]; 2853 } 2854 } 2855 2856 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2857 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2858 ierr = PetscFree(idxb);CHKERRQ(ierr); 2859 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2860 PetscFunctionReturn(0); 2861 } 2862 2863 #undef __FUNCT__ 2864 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2865 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2866 { 2867 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2868 PetscErrorCode ierr; 2869 PetscInt i,*idxb = 0; 2870 PetscScalar *va,*vb; 2871 Vec vtmp; 2872 2873 PetscFunctionBegin; 2874 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2875 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2876 if (idx) { 2877 for (i=0; i<A->cmap->n; i++) { 2878 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2879 } 2880 } 2881 2882 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2883 if (idx) { 2884 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2885 } 2886 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2887 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2888 2889 for (i=0; i<A->rmap->n; i++){ 2890 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2891 va[i] = vb[i]; 2892 if (idx) idx[i] = a->garray[idxb[i]]; 2893 } 2894 } 2895 2896 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2897 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2898 ierr = PetscFree(idxb);CHKERRQ(ierr); 2899 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2900 PetscFunctionReturn(0); 2901 } 2902 2903 #undef __FUNCT__ 2904 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2905 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2906 { 2907 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2908 PetscInt n = A->rmap->n; 2909 PetscInt cstart = A->cmap->rstart; 2910 PetscInt *cmap = mat->garray; 2911 PetscInt *diagIdx, *offdiagIdx; 2912 Vec diagV, offdiagV; 2913 PetscScalar *a, *diagA, *offdiagA; 2914 PetscInt r; 2915 PetscErrorCode ierr; 2916 2917 PetscFunctionBegin; 2918 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2919 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2920 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2921 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2922 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2923 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2924 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2925 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2926 for(r = 0; r < n; ++r) { 2927 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2928 a[r] = diagA[r]; 2929 idx[r] = cstart + diagIdx[r]; 2930 } else { 2931 a[r] = offdiagA[r]; 2932 idx[r] = cmap[offdiagIdx[r]]; 2933 } 2934 } 2935 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2936 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2937 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2938 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2939 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2940 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2941 PetscFunctionReturn(0); 2942 } 2943 2944 #undef __FUNCT__ 2945 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2946 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2947 { 2948 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2949 PetscInt n = A->rmap->n; 2950 PetscInt cstart = A->cmap->rstart; 2951 PetscInt *cmap = mat->garray; 2952 PetscInt *diagIdx, *offdiagIdx; 2953 Vec diagV, offdiagV; 2954 PetscScalar *a, *diagA, *offdiagA; 2955 PetscInt r; 2956 PetscErrorCode ierr; 2957 2958 PetscFunctionBegin; 2959 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2960 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2961 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2962 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2963 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2964 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2965 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2966 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2967 for(r = 0; r < n; ++r) { 2968 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2969 a[r] = diagA[r]; 2970 idx[r] = cstart + diagIdx[r]; 2971 } else { 2972 a[r] = offdiagA[r]; 2973 idx[r] = cmap[offdiagIdx[r]]; 2974 } 2975 } 2976 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2977 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2978 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2979 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2980 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2981 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2982 PetscFunctionReturn(0); 2983 } 2984 2985 #undef __FUNCT__ 2986 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2987 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2988 { 2989 PetscErrorCode ierr; 2990 Mat *dummy; 2991 2992 PetscFunctionBegin; 2993 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2994 *newmat = *dummy; 2995 ierr = PetscFree(dummy);CHKERRQ(ierr); 2996 PetscFunctionReturn(0); 2997 } 2998 2999 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3000 3001 #undef __FUNCT__ 3002 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3003 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3004 { 3005 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3006 PetscErrorCode ierr; 3007 3008 PetscFunctionBegin; 3009 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3010 PetscFunctionReturn(0); 3011 } 3012 3013 #undef __FUNCT__ 3014 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3015 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3016 { 3017 PetscErrorCode ierr; 3018 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3019 3020 PetscFunctionBegin; 3021 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3022 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3023 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3024 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 3028 /* -------------------------------------------------------------------*/ 3029 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3030 MatGetRow_MPIAIJ, 3031 MatRestoreRow_MPIAIJ, 3032 MatMult_MPIAIJ, 3033 /* 4*/ MatMultAdd_MPIAIJ, 3034 MatMultTranspose_MPIAIJ, 3035 MatMultTransposeAdd_MPIAIJ, 3036 #ifdef PETSC_HAVE_PBGL 3037 MatSolve_MPIAIJ, 3038 #else 3039 0, 3040 #endif 3041 0, 3042 0, 3043 /*10*/ 0, 3044 0, 3045 0, 3046 MatSOR_MPIAIJ, 3047 MatTranspose_MPIAIJ, 3048 /*15*/ MatGetInfo_MPIAIJ, 3049 MatEqual_MPIAIJ, 3050 MatGetDiagonal_MPIAIJ, 3051 MatDiagonalScale_MPIAIJ, 3052 MatNorm_MPIAIJ, 3053 /*20*/ MatAssemblyBegin_MPIAIJ, 3054 MatAssemblyEnd_MPIAIJ, 3055 MatSetOption_MPIAIJ, 3056 MatZeroEntries_MPIAIJ, 3057 /*24*/ MatZeroRows_MPIAIJ, 3058 0, 3059 #ifdef PETSC_HAVE_PBGL 3060 0, 3061 #else 3062 0, 3063 #endif 3064 0, 3065 0, 3066 /*29*/ MatSetUp_MPIAIJ, 3067 #ifdef PETSC_HAVE_PBGL 3068 0, 3069 #else 3070 0, 3071 #endif 3072 0, 3073 0, 3074 0, 3075 /*34*/ MatDuplicate_MPIAIJ, 3076 0, 3077 0, 3078 0, 3079 0, 3080 /*39*/ MatAXPY_MPIAIJ, 3081 MatGetSubMatrices_MPIAIJ, 3082 MatIncreaseOverlap_MPIAIJ, 3083 MatGetValues_MPIAIJ, 3084 MatCopy_MPIAIJ, 3085 /*44*/ MatGetRowMax_MPIAIJ, 3086 MatScale_MPIAIJ, 3087 0, 3088 0, 3089 MatZeroRowsColumns_MPIAIJ, 3090 /*49*/ MatSetRandom_MPIAIJ, 3091 0, 3092 0, 3093 0, 3094 0, 3095 /*54*/ MatFDColoringCreate_MPIAIJ, 3096 0, 3097 MatSetUnfactored_MPIAIJ, 3098 0, /* MatPermute_MPIAIJ, impl currently broken */ 3099 0, 3100 /*59*/ MatGetSubMatrix_MPIAIJ, 3101 MatDestroy_MPIAIJ, 3102 MatView_MPIAIJ, 3103 0, 3104 0, 3105 /*64*/ 0, 3106 0, 3107 0, 3108 0, 3109 0, 3110 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3111 MatGetRowMinAbs_MPIAIJ, 3112 0, 3113 MatSetColoring_MPIAIJ, 3114 #if defined(PETSC_HAVE_ADIC) 3115 MatSetValuesAdic_MPIAIJ, 3116 #else 3117 0, 3118 #endif 3119 MatSetValuesAdifor_MPIAIJ, 3120 /*75*/ MatFDColoringApply_AIJ, 3121 0, 3122 0, 3123 0, 3124 MatFindZeroDiagonals_MPIAIJ, 3125 /*80*/ 0, 3126 0, 3127 0, 3128 /*83*/ MatLoad_MPIAIJ, 3129 0, 3130 0, 3131 0, 3132 0, 3133 0, 3134 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3135 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3136 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3137 MatPtAP_Basic, 3138 MatPtAPSymbolic_MPIAIJ, 3139 /*94*/ MatPtAPNumeric_MPIAIJ, 3140 0, 3141 0, 3142 0, 3143 0, 3144 /*99*/ 0, 3145 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3146 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3147 MatConjugate_MPIAIJ, 3148 0, 3149 /*104*/MatSetValuesRow_MPIAIJ, 3150 MatRealPart_MPIAIJ, 3151 MatImaginaryPart_MPIAIJ, 3152 0, 3153 0, 3154 /*109*/0, 3155 MatGetRedundantMatrix_MPIAIJ, 3156 MatGetRowMin_MPIAIJ, 3157 0, 3158 0, 3159 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3160 0, 3161 0, 3162 0, 3163 0, 3164 /*119*/0, 3165 0, 3166 0, 3167 0, 3168 MatGetMultiProcBlock_MPIAIJ, 3169 /*124*/MatFindNonzeroRows_MPIAIJ, 3170 MatGetColumnNorms_MPIAIJ, 3171 MatInvertBlockDiagonal_MPIAIJ, 3172 0, 3173 MatGetSubMatricesParallel_MPIAIJ, 3174 /*129*/0, 3175 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3176 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3177 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3178 0, 3179 /*134*/0, 3180 0, 3181 0, 3182 0, 3183 0 3184 }; 3185 3186 /* ----------------------------------------------------------------------------------------*/ 3187 3188 EXTERN_C_BEGIN 3189 #undef __FUNCT__ 3190 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3191 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3192 { 3193 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3194 PetscErrorCode ierr; 3195 3196 PetscFunctionBegin; 3197 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3198 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3199 PetscFunctionReturn(0); 3200 } 3201 EXTERN_C_END 3202 3203 EXTERN_C_BEGIN 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3206 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3207 { 3208 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3209 PetscErrorCode ierr; 3210 3211 PetscFunctionBegin; 3212 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3213 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3214 PetscFunctionReturn(0); 3215 } 3216 EXTERN_C_END 3217 3218 EXTERN_C_BEGIN 3219 #undef __FUNCT__ 3220 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3221 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3222 { 3223 Mat_MPIAIJ *b; 3224 PetscErrorCode ierr; 3225 PetscInt i; 3226 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3227 3228 PetscFunctionBegin; 3229 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3230 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3231 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3232 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3233 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3234 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3235 3236 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3237 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3238 if (d_nnz) { 3239 for (i=0; i<B->rmap->n; i++) { 3240 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3241 } 3242 } 3243 if (o_nnz) { 3244 for (i=0; i<B->rmap->n; i++) { 3245 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3246 } 3247 } 3248 b = (Mat_MPIAIJ*)B->data; 3249 3250 if (!B->preallocated) { 3251 /* Explicitly create 2 MATSEQAIJ matrices. */ 3252 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3253 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3254 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3255 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3256 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3257 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3258 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3259 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3260 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3261 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3262 } 3263 3264 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3265 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3266 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3267 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3268 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3269 B->preallocated = PETSC_TRUE; 3270 PetscFunctionReturn(0); 3271 } 3272 EXTERN_C_END 3273 3274 #undef __FUNCT__ 3275 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3276 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3277 { 3278 Mat mat; 3279 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3280 PetscErrorCode ierr; 3281 3282 PetscFunctionBegin; 3283 *newmat = 0; 3284 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3285 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3286 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3287 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3288 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3289 a = (Mat_MPIAIJ*)mat->data; 3290 3291 mat->factortype = matin->factortype; 3292 mat->rmap->bs = matin->rmap->bs; 3293 mat->cmap->bs = matin->cmap->bs; 3294 mat->assembled = PETSC_TRUE; 3295 mat->insertmode = NOT_SET_VALUES; 3296 mat->preallocated = PETSC_TRUE; 3297 3298 a->size = oldmat->size; 3299 a->rank = oldmat->rank; 3300 a->donotstash = oldmat->donotstash; 3301 a->roworiented = oldmat->roworiented; 3302 a->rowindices = 0; 3303 a->rowvalues = 0; 3304 a->getrowactive = PETSC_FALSE; 3305 3306 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3307 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3308 3309 if (oldmat->colmap) { 3310 #if defined (PETSC_USE_CTABLE) 3311 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3312 #else 3313 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3314 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3315 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3316 #endif 3317 } else a->colmap = 0; 3318 if (oldmat->garray) { 3319 PetscInt len; 3320 len = oldmat->B->cmap->n; 3321 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3322 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3323 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3324 } else a->garray = 0; 3325 3326 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3327 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3328 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3329 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3330 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3331 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3332 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3333 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3334 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3335 *newmat = mat; 3336 PetscFunctionReturn(0); 3337 } 3338 3339 3340 3341 #undef __FUNCT__ 3342 #define __FUNCT__ "MatLoad_MPIAIJ" 3343 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3344 { 3345 PetscScalar *vals,*svals; 3346 MPI_Comm comm = ((PetscObject)viewer)->comm; 3347 PetscErrorCode ierr; 3348 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3349 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3350 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3351 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3352 PetscInt cend,cstart,n,*rowners,sizesset=1; 3353 int fd; 3354 PetscInt bs = 1; 3355 3356 PetscFunctionBegin; 3357 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3358 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3359 if (!rank) { 3360 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3361 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3362 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3363 } 3364 3365 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3366 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3367 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3368 3369 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3370 3371 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3372 M = header[1]; N = header[2]; 3373 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3374 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3375 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3376 3377 /* If global sizes are set, check if they are consistent with that given in the file */ 3378 if (sizesset) { 3379 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3380 } 3381 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3382 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3383 3384 /* determine ownership of all (block) rows */ 3385 if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3386 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3387 else m = newMat->rmap->n; /* Set by user */ 3388 3389 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3390 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3391 3392 /* First process needs enough room for process with most rows */ 3393 if (!rank) { 3394 mmax = rowners[1]; 3395 for (i=2; i<size; i++) { 3396 mmax = PetscMax(mmax,rowners[i]); 3397 } 3398 } else mmax = m; 3399 3400 rowners[0] = 0; 3401 for (i=2; i<=size; i++) { 3402 rowners[i] += rowners[i-1]; 3403 } 3404 rstart = rowners[rank]; 3405 rend = rowners[rank+1]; 3406 3407 /* distribute row lengths to all processors */ 3408 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3409 if (!rank) { 3410 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3411 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3412 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3413 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3414 for (j=0; j<m; j++) { 3415 procsnz[0] += ourlens[j]; 3416 } 3417 for (i=1; i<size; i++) { 3418 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3419 /* calculate the number of nonzeros on each processor */ 3420 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3421 procsnz[i] += rowlengths[j]; 3422 } 3423 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3424 } 3425 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3426 } else { 3427 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3428 } 3429 3430 if (!rank) { 3431 /* determine max buffer needed and allocate it */ 3432 maxnz = 0; 3433 for (i=0; i<size; i++) { 3434 maxnz = PetscMax(maxnz,procsnz[i]); 3435 } 3436 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3437 3438 /* read in my part of the matrix column indices */ 3439 nz = procsnz[0]; 3440 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3441 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3442 3443 /* read in every one elses and ship off */ 3444 for (i=1; i<size; i++) { 3445 nz = procsnz[i]; 3446 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3447 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3448 } 3449 ierr = PetscFree(cols);CHKERRQ(ierr); 3450 } else { 3451 /* determine buffer space needed for message */ 3452 nz = 0; 3453 for (i=0; i<m; i++) { 3454 nz += ourlens[i]; 3455 } 3456 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3457 3458 /* receive message of column indices*/ 3459 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3460 } 3461 3462 /* determine column ownership if matrix is not square */ 3463 if (N != M) { 3464 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3465 else n = newMat->cmap->n; 3466 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3467 cstart = cend - n; 3468 } else { 3469 cstart = rstart; 3470 cend = rend; 3471 n = cend - cstart; 3472 } 3473 3474 /* loop over local rows, determining number of off diagonal entries */ 3475 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3476 jj = 0; 3477 for (i=0; i<m; i++) { 3478 for (j=0; j<ourlens[i]; j++) { 3479 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3480 jj++; 3481 } 3482 } 3483 3484 for (i=0; i<m; i++) { 3485 ourlens[i] -= offlens[i]; 3486 } 3487 if (!sizesset) { 3488 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3489 } 3490 3491 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3492 3493 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3494 3495 for (i=0; i<m; i++) { 3496 ourlens[i] += offlens[i]; 3497 } 3498 3499 if (!rank) { 3500 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3501 3502 /* read in my part of the matrix numerical values */ 3503 nz = procsnz[0]; 3504 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3505 3506 /* insert into matrix */ 3507 jj = rstart; 3508 smycols = mycols; 3509 svals = vals; 3510 for (i=0; i<m; i++) { 3511 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3512 smycols += ourlens[i]; 3513 svals += ourlens[i]; 3514 jj++; 3515 } 3516 3517 /* read in other processors and ship out */ 3518 for (i=1; i<size; i++) { 3519 nz = procsnz[i]; 3520 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3521 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3522 } 3523 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3524 } else { 3525 /* receive numeric values */ 3526 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3527 3528 /* receive message of values*/ 3529 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3530 3531 /* insert into matrix */ 3532 jj = rstart; 3533 smycols = mycols; 3534 svals = vals; 3535 for (i=0; i<m; i++) { 3536 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3537 smycols += ourlens[i]; 3538 svals += ourlens[i]; 3539 jj++; 3540 } 3541 } 3542 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3543 ierr = PetscFree(vals);CHKERRQ(ierr); 3544 ierr = PetscFree(mycols);CHKERRQ(ierr); 3545 ierr = PetscFree(rowners);CHKERRQ(ierr); 3546 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3547 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3548 3549 PetscFunctionReturn(0); 3550 } 3551 3552 #undef __FUNCT__ 3553 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3554 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3555 { 3556 PetscErrorCode ierr; 3557 IS iscol_local; 3558 PetscInt csize; 3559 3560 PetscFunctionBegin; 3561 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3562 if (call == MAT_REUSE_MATRIX) { 3563 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3564 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3565 } else { 3566 PetscInt cbs; 3567 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3568 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3569 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3570 } 3571 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3572 if (call == MAT_INITIAL_MATRIX) { 3573 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3574 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3575 } 3576 PetscFunctionReturn(0); 3577 } 3578 3579 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3580 #undef __FUNCT__ 3581 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3582 /* 3583 Not great since it makes two copies of the submatrix, first an SeqAIJ 3584 in local and then by concatenating the local matrices the end result. 3585 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3586 3587 Note: This requires a sequential iscol with all indices. 3588 */ 3589 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3590 { 3591 PetscErrorCode ierr; 3592 PetscMPIInt rank,size; 3593 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3594 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3595 PetscBool allcolumns, colflag; 3596 Mat M,Mreuse; 3597 MatScalar *vwork,*aa; 3598 MPI_Comm comm = ((PetscObject)mat)->comm; 3599 Mat_SeqAIJ *aij; 3600 3601 3602 PetscFunctionBegin; 3603 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3604 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3605 3606 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3607 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3608 if (colflag && ncol == mat->cmap->N){ 3609 allcolumns = PETSC_TRUE; 3610 } else { 3611 allcolumns = PETSC_FALSE; 3612 } 3613 if (call == MAT_REUSE_MATRIX) { 3614 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3615 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3616 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3617 } else { 3618 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3619 } 3620 3621 /* 3622 m - number of local rows 3623 n - number of columns (same on all processors) 3624 rstart - first row in new global matrix generated 3625 */ 3626 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3627 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3628 if (call == MAT_INITIAL_MATRIX) { 3629 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3630 ii = aij->i; 3631 jj = aij->j; 3632 3633 /* 3634 Determine the number of non-zeros in the diagonal and off-diagonal 3635 portions of the matrix in order to do correct preallocation 3636 */ 3637 3638 /* first get start and end of "diagonal" columns */ 3639 if (csize == PETSC_DECIDE) { 3640 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3641 if (mglobal == n) { /* square matrix */ 3642 nlocal = m; 3643 } else { 3644 nlocal = n/size + ((n % size) > rank); 3645 } 3646 } else { 3647 nlocal = csize; 3648 } 3649 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3650 rstart = rend - nlocal; 3651 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3652 3653 /* next, compute all the lengths */ 3654 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3655 olens = dlens + m; 3656 for (i=0; i<m; i++) { 3657 jend = ii[i+1] - ii[i]; 3658 olen = 0; 3659 dlen = 0; 3660 for (j=0; j<jend; j++) { 3661 if (*jj < rstart || *jj >= rend) olen++; 3662 else dlen++; 3663 jj++; 3664 } 3665 olens[i] = olen; 3666 dlens[i] = dlen; 3667 } 3668 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3669 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3670 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3671 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3672 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3673 ierr = PetscFree(dlens);CHKERRQ(ierr); 3674 } else { 3675 PetscInt ml,nl; 3676 3677 M = *newmat; 3678 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3679 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3680 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3681 /* 3682 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3683 rather than the slower MatSetValues(). 3684 */ 3685 M->was_assembled = PETSC_TRUE; 3686 M->assembled = PETSC_FALSE; 3687 } 3688 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3689 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3690 ii = aij->i; 3691 jj = aij->j; 3692 aa = aij->a; 3693 for (i=0; i<m; i++) { 3694 row = rstart + i; 3695 nz = ii[i+1] - ii[i]; 3696 cwork = jj; jj += nz; 3697 vwork = aa; aa += nz; 3698 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3699 } 3700 3701 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3702 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3703 *newmat = M; 3704 3705 /* save submatrix used in processor for next request */ 3706 if (call == MAT_INITIAL_MATRIX) { 3707 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3708 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3709 } 3710 3711 PetscFunctionReturn(0); 3712 } 3713 3714 EXTERN_C_BEGIN 3715 #undef __FUNCT__ 3716 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3717 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3718 { 3719 PetscInt m,cstart, cend,j,nnz,i,d; 3720 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3721 const PetscInt *JJ; 3722 PetscScalar *values; 3723 PetscErrorCode ierr; 3724 3725 PetscFunctionBegin; 3726 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3727 3728 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3729 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3730 m = B->rmap->n; 3731 cstart = B->cmap->rstart; 3732 cend = B->cmap->rend; 3733 rstart = B->rmap->rstart; 3734 3735 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3736 3737 #if defined(PETSC_USE_DEBUGGING) 3738 for (i=0; i<m; i++) { 3739 nnz = Ii[i+1]- Ii[i]; 3740 JJ = J + Ii[i]; 3741 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3742 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3743 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3744 } 3745 #endif 3746 3747 for (i=0; i<m; i++) { 3748 nnz = Ii[i+1]- Ii[i]; 3749 JJ = J + Ii[i]; 3750 nnz_max = PetscMax(nnz_max,nnz); 3751 d = 0; 3752 for (j=0; j<nnz; j++) { 3753 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3754 } 3755 d_nnz[i] = d; 3756 o_nnz[i] = nnz - d; 3757 } 3758 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3759 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3760 3761 if (v) values = (PetscScalar*)v; 3762 else { 3763 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3764 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3765 } 3766 3767 for (i=0; i<m; i++) { 3768 ii = i + rstart; 3769 nnz = Ii[i+1]- Ii[i]; 3770 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3771 } 3772 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3773 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3774 3775 if (!v) { 3776 ierr = PetscFree(values);CHKERRQ(ierr); 3777 } 3778 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3779 PetscFunctionReturn(0); 3780 } 3781 EXTERN_C_END 3782 3783 #undef __FUNCT__ 3784 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3785 /*@ 3786 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3787 (the default parallel PETSc format). 3788 3789 Collective on MPI_Comm 3790 3791 Input Parameters: 3792 + B - the matrix 3793 . i - the indices into j for the start of each local row (starts with zero) 3794 . j - the column indices for each local row (starts with zero) 3795 - v - optional values in the matrix 3796 3797 Level: developer 3798 3799 Notes: 3800 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3801 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3802 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3803 3804 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3805 3806 The format which is used for the sparse matrix input, is equivalent to a 3807 row-major ordering.. i.e for the following matrix, the input data expected is 3808 as shown: 3809 3810 1 0 0 3811 2 0 3 P0 3812 ------- 3813 4 5 6 P1 3814 3815 Process0 [P0]: rows_owned=[0,1] 3816 i = {0,1,3} [size = nrow+1 = 2+1] 3817 j = {0,0,2} [size = nz = 6] 3818 v = {1,2,3} [size = nz = 6] 3819 3820 Process1 [P1]: rows_owned=[2] 3821 i = {0,3} [size = nrow+1 = 1+1] 3822 j = {0,1,2} [size = nz = 6] 3823 v = {4,5,6} [size = nz = 6] 3824 3825 .keywords: matrix, aij, compressed row, sparse, parallel 3826 3827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3828 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3829 @*/ 3830 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3831 { 3832 PetscErrorCode ierr; 3833 3834 PetscFunctionBegin; 3835 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3836 PetscFunctionReturn(0); 3837 } 3838 3839 #undef __FUNCT__ 3840 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3841 /*@C 3842 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3843 (the default parallel PETSc format). For good matrix assembly performance 3844 the user should preallocate the matrix storage by setting the parameters 3845 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3846 performance can be increased by more than a factor of 50. 3847 3848 Collective on MPI_Comm 3849 3850 Input Parameters: 3851 + A - the matrix 3852 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3853 (same value is used for all local rows) 3854 . d_nnz - array containing the number of nonzeros in the various rows of the 3855 DIAGONAL portion of the local submatrix (possibly different for each row) 3856 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3857 The size of this array is equal to the number of local rows, i.e 'm'. 3858 For matrices that will be factored, you must leave room for (and set) 3859 the diagonal entry even if it is zero. 3860 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3861 submatrix (same value is used for all local rows). 3862 - o_nnz - array containing the number of nonzeros in the various rows of the 3863 OFF-DIAGONAL portion of the local submatrix (possibly different for 3864 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3865 structure. The size of this array is equal to the number 3866 of local rows, i.e 'm'. 3867 3868 If the *_nnz parameter is given then the *_nz parameter is ignored 3869 3870 The AIJ format (also called the Yale sparse matrix format or 3871 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3872 storage. The stored row and column indices begin with zero. 3873 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3874 3875 The parallel matrix is partitioned such that the first m0 rows belong to 3876 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3877 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3878 3879 The DIAGONAL portion of the local submatrix of a processor can be defined 3880 as the submatrix which is obtained by extraction the part corresponding to 3881 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3882 first row that belongs to the processor, r2 is the last row belonging to 3883 the this processor, and c1-c2 is range of indices of the local part of a 3884 vector suitable for applying the matrix to. This is an mxn matrix. In the 3885 common case of a square matrix, the row and column ranges are the same and 3886 the DIAGONAL part is also square. The remaining portion of the local 3887 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3888 3889 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3890 3891 You can call MatGetInfo() to get information on how effective the preallocation was; 3892 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3893 You can also run with the option -info and look for messages with the string 3894 malloc in them to see if additional memory allocation was needed. 3895 3896 Example usage: 3897 3898 Consider the following 8x8 matrix with 34 non-zero values, that is 3899 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3900 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3901 as follows: 3902 3903 .vb 3904 1 2 0 | 0 3 0 | 0 4 3905 Proc0 0 5 6 | 7 0 0 | 8 0 3906 9 0 10 | 11 0 0 | 12 0 3907 ------------------------------------- 3908 13 0 14 | 15 16 17 | 0 0 3909 Proc1 0 18 0 | 19 20 21 | 0 0 3910 0 0 0 | 22 23 0 | 24 0 3911 ------------------------------------- 3912 Proc2 25 26 27 | 0 0 28 | 29 0 3913 30 0 0 | 31 32 33 | 0 34 3914 .ve 3915 3916 This can be represented as a collection of submatrices as: 3917 3918 .vb 3919 A B C 3920 D E F 3921 G H I 3922 .ve 3923 3924 Where the submatrices A,B,C are owned by proc0, D,E,F are 3925 owned by proc1, G,H,I are owned by proc2. 3926 3927 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3928 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3929 The 'M','N' parameters are 8,8, and have the same values on all procs. 3930 3931 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3932 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3933 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3934 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3935 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3936 matrix, ans [DF] as another SeqAIJ matrix. 3937 3938 When d_nz, o_nz parameters are specified, d_nz storage elements are 3939 allocated for every row of the local diagonal submatrix, and o_nz 3940 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3941 One way to choose d_nz and o_nz is to use the max nonzerors per local 3942 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3943 In this case, the values of d_nz,o_nz are: 3944 .vb 3945 proc0 : dnz = 2, o_nz = 2 3946 proc1 : dnz = 3, o_nz = 2 3947 proc2 : dnz = 1, o_nz = 4 3948 .ve 3949 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3950 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3951 for proc3. i.e we are using 12+15+10=37 storage locations to store 3952 34 values. 3953 3954 When d_nnz, o_nnz parameters are specified, the storage is specified 3955 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3956 In the above case the values for d_nnz,o_nnz are: 3957 .vb 3958 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3959 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3960 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3961 .ve 3962 Here the space allocated is sum of all the above values i.e 34, and 3963 hence pre-allocation is perfect. 3964 3965 Level: intermediate 3966 3967 .keywords: matrix, aij, compressed row, sparse, parallel 3968 3969 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3970 MPIAIJ, MatGetInfo() 3971 @*/ 3972 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3973 { 3974 PetscErrorCode ierr; 3975 3976 PetscFunctionBegin; 3977 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3978 PetscValidType(B,1); 3979 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3980 PetscFunctionReturn(0); 3981 } 3982 3983 #undef __FUNCT__ 3984 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3985 /*@ 3986 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3987 CSR format the local rows. 3988 3989 Collective on MPI_Comm 3990 3991 Input Parameters: 3992 + comm - MPI communicator 3993 . m - number of local rows (Cannot be PETSC_DECIDE) 3994 . n - This value should be the same as the local size used in creating the 3995 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3996 calculated if N is given) For square matrices n is almost always m. 3997 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3998 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3999 . i - row indices 4000 . j - column indices 4001 - a - matrix values 4002 4003 Output Parameter: 4004 . mat - the matrix 4005 4006 Level: intermediate 4007 4008 Notes: 4009 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4010 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4011 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4012 4013 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4014 4015 The format which is used for the sparse matrix input, is equivalent to a 4016 row-major ordering.. i.e for the following matrix, the input data expected is 4017 as shown: 4018 4019 1 0 0 4020 2 0 3 P0 4021 ------- 4022 4 5 6 P1 4023 4024 Process0 [P0]: rows_owned=[0,1] 4025 i = {0,1,3} [size = nrow+1 = 2+1] 4026 j = {0,0,2} [size = nz = 6] 4027 v = {1,2,3} [size = nz = 6] 4028 4029 Process1 [P1]: rows_owned=[2] 4030 i = {0,3} [size = nrow+1 = 1+1] 4031 j = {0,1,2} [size = nz = 6] 4032 v = {4,5,6} [size = nz = 6] 4033 4034 .keywords: matrix, aij, compressed row, sparse, parallel 4035 4036 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4037 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4038 @*/ 4039 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4040 { 4041 PetscErrorCode ierr; 4042 4043 PetscFunctionBegin; 4044 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4045 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4046 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4047 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4048 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4049 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4050 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4051 PetscFunctionReturn(0); 4052 } 4053 4054 #undef __FUNCT__ 4055 #define __FUNCT__ "MatCreateAIJ" 4056 /*@C 4057 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4058 (the default parallel PETSc format). For good matrix assembly performance 4059 the user should preallocate the matrix storage by setting the parameters 4060 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4061 performance can be increased by more than a factor of 50. 4062 4063 Collective on MPI_Comm 4064 4065 Input Parameters: 4066 + comm - MPI communicator 4067 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4068 This value should be the same as the local size used in creating the 4069 y vector for the matrix-vector product y = Ax. 4070 . n - This value should be the same as the local size used in creating the 4071 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4072 calculated if N is given) For square matrices n is almost always m. 4073 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4074 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4075 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4076 (same value is used for all local rows) 4077 . d_nnz - array containing the number of nonzeros in the various rows of the 4078 DIAGONAL portion of the local submatrix (possibly different for each row) 4079 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4080 The size of this array is equal to the number of local rows, i.e 'm'. 4081 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4082 submatrix (same value is used for all local rows). 4083 - o_nnz - array containing the number of nonzeros in the various rows of the 4084 OFF-DIAGONAL portion of the local submatrix (possibly different for 4085 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4086 structure. The size of this array is equal to the number 4087 of local rows, i.e 'm'. 4088 4089 Output Parameter: 4090 . A - the matrix 4091 4092 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4093 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4094 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4095 4096 Notes: 4097 If the *_nnz parameter is given then the *_nz parameter is ignored 4098 4099 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4100 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4101 storage requirements for this matrix. 4102 4103 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4104 processor than it must be used on all processors that share the object for 4105 that argument. 4106 4107 The user MUST specify either the local or global matrix dimensions 4108 (possibly both). 4109 4110 The parallel matrix is partitioned across processors such that the 4111 first m0 rows belong to process 0, the next m1 rows belong to 4112 process 1, the next m2 rows belong to process 2 etc.. where 4113 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4114 values corresponding to [m x N] submatrix. 4115 4116 The columns are logically partitioned with the n0 columns belonging 4117 to 0th partition, the next n1 columns belonging to the next 4118 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4119 4120 The DIAGONAL portion of the local submatrix on any given processor 4121 is the submatrix corresponding to the rows and columns m,n 4122 corresponding to the given processor. i.e diagonal matrix on 4123 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4124 etc. The remaining portion of the local submatrix [m x (N-n)] 4125 constitute the OFF-DIAGONAL portion. The example below better 4126 illustrates this concept. 4127 4128 For a square global matrix we define each processor's diagonal portion 4129 to be its local rows and the corresponding columns (a square submatrix); 4130 each processor's off-diagonal portion encompasses the remainder of the 4131 local matrix (a rectangular submatrix). 4132 4133 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4134 4135 When calling this routine with a single process communicator, a matrix of 4136 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4137 type of communicator, use the construction mechanism: 4138 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4139 4140 By default, this format uses inodes (identical nodes) when possible. 4141 We search for consecutive rows with the same nonzero structure, thereby 4142 reusing matrix information to achieve increased efficiency. 4143 4144 Options Database Keys: 4145 + -mat_no_inode - Do not use inodes 4146 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4147 - -mat_aij_oneindex - Internally use indexing starting at 1 4148 rather than 0. Note that when calling MatSetValues(), 4149 the user still MUST index entries starting at 0! 4150 4151 4152 Example usage: 4153 4154 Consider the following 8x8 matrix with 34 non-zero values, that is 4155 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4156 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4157 as follows: 4158 4159 .vb 4160 1 2 0 | 0 3 0 | 0 4 4161 Proc0 0 5 6 | 7 0 0 | 8 0 4162 9 0 10 | 11 0 0 | 12 0 4163 ------------------------------------- 4164 13 0 14 | 15 16 17 | 0 0 4165 Proc1 0 18 0 | 19 20 21 | 0 0 4166 0 0 0 | 22 23 0 | 24 0 4167 ------------------------------------- 4168 Proc2 25 26 27 | 0 0 28 | 29 0 4169 30 0 0 | 31 32 33 | 0 34 4170 .ve 4171 4172 This can be represented as a collection of submatrices as: 4173 4174 .vb 4175 A B C 4176 D E F 4177 G H I 4178 .ve 4179 4180 Where the submatrices A,B,C are owned by proc0, D,E,F are 4181 owned by proc1, G,H,I are owned by proc2. 4182 4183 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4184 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4185 The 'M','N' parameters are 8,8, and have the same values on all procs. 4186 4187 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4188 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4189 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4190 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4191 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4192 matrix, ans [DF] as another SeqAIJ matrix. 4193 4194 When d_nz, o_nz parameters are specified, d_nz storage elements are 4195 allocated for every row of the local diagonal submatrix, and o_nz 4196 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4197 One way to choose d_nz and o_nz is to use the max nonzerors per local 4198 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4199 In this case, the values of d_nz,o_nz are: 4200 .vb 4201 proc0 : dnz = 2, o_nz = 2 4202 proc1 : dnz = 3, o_nz = 2 4203 proc2 : dnz = 1, o_nz = 4 4204 .ve 4205 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4206 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4207 for proc3. i.e we are using 12+15+10=37 storage locations to store 4208 34 values. 4209 4210 When d_nnz, o_nnz parameters are specified, the storage is specified 4211 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4212 In the above case the values for d_nnz,o_nnz are: 4213 .vb 4214 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4215 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4216 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4217 .ve 4218 Here the space allocated is sum of all the above values i.e 34, and 4219 hence pre-allocation is perfect. 4220 4221 Level: intermediate 4222 4223 .keywords: matrix, aij, compressed row, sparse, parallel 4224 4225 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4226 MPIAIJ, MatCreateMPIAIJWithArrays() 4227 @*/ 4228 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4229 { 4230 PetscErrorCode ierr; 4231 PetscMPIInt size; 4232 4233 PetscFunctionBegin; 4234 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4235 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4236 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4237 if (size > 1) { 4238 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4239 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4240 } else { 4241 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4242 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4243 } 4244 PetscFunctionReturn(0); 4245 } 4246 4247 #undef __FUNCT__ 4248 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4249 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4250 { 4251 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4252 4253 PetscFunctionBegin; 4254 *Ad = a->A; 4255 *Ao = a->B; 4256 *colmap = a->garray; 4257 PetscFunctionReturn(0); 4258 } 4259 4260 #undef __FUNCT__ 4261 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4262 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4263 { 4264 PetscErrorCode ierr; 4265 PetscInt i; 4266 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4267 4268 PetscFunctionBegin; 4269 if (coloring->ctype == IS_COLORING_GLOBAL) { 4270 ISColoringValue *allcolors,*colors; 4271 ISColoring ocoloring; 4272 4273 /* set coloring for diagonal portion */ 4274 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4275 4276 /* set coloring for off-diagonal portion */ 4277 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4278 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4279 for (i=0; i<a->B->cmap->n; i++) { 4280 colors[i] = allcolors[a->garray[i]]; 4281 } 4282 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4283 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4284 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4285 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4286 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4287 ISColoringValue *colors; 4288 PetscInt *larray; 4289 ISColoring ocoloring; 4290 4291 /* set coloring for diagonal portion */ 4292 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4293 for (i=0; i<a->A->cmap->n; i++) { 4294 larray[i] = i + A->cmap->rstart; 4295 } 4296 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4297 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4298 for (i=0; i<a->A->cmap->n; i++) { 4299 colors[i] = coloring->colors[larray[i]]; 4300 } 4301 ierr = PetscFree(larray);CHKERRQ(ierr); 4302 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4303 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4304 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4305 4306 /* set coloring for off-diagonal portion */ 4307 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4308 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4309 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4310 for (i=0; i<a->B->cmap->n; i++) { 4311 colors[i] = coloring->colors[larray[i]]; 4312 } 4313 ierr = PetscFree(larray);CHKERRQ(ierr); 4314 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4315 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4316 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4317 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4318 4319 PetscFunctionReturn(0); 4320 } 4321 4322 #if defined(PETSC_HAVE_ADIC) 4323 #undef __FUNCT__ 4324 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4325 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4326 { 4327 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4328 PetscErrorCode ierr; 4329 4330 PetscFunctionBegin; 4331 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4332 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4333 PetscFunctionReturn(0); 4334 } 4335 #endif 4336 4337 #undef __FUNCT__ 4338 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4339 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4340 { 4341 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4342 PetscErrorCode ierr; 4343 4344 PetscFunctionBegin; 4345 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4346 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4347 PetscFunctionReturn(0); 4348 } 4349 4350 #undef __FUNCT__ 4351 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4352 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4353 { 4354 PetscErrorCode ierr; 4355 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4356 PetscInt *indx; 4357 4358 PetscFunctionBegin; 4359 /* This routine will ONLY return MPIAIJ type matrix */ 4360 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4361 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4362 if (n == PETSC_DECIDE){ 4363 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4364 } 4365 /* Check sum(n) = N */ 4366 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4367 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4368 4369 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4370 rstart -= m; 4371 4372 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4373 for (i=0;i<m;i++) { 4374 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4375 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4376 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4377 } 4378 4379 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4380 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4381 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4382 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4383 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4384 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4385 PetscFunctionReturn(0); 4386 } 4387 4388 #undef __FUNCT__ 4389 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4390 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4391 { 4392 PetscErrorCode ierr; 4393 PetscInt m,N,i,rstart,nnz,Ii; 4394 PetscInt *indx; 4395 PetscScalar *values; 4396 4397 PetscFunctionBegin; 4398 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4399 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4400 for (i=0;i<m;i++) { 4401 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4402 Ii = i + rstart; 4403 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4404 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4405 } 4406 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4407 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4408 PetscFunctionReturn(0); 4409 } 4410 4411 #undef __FUNCT__ 4412 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4413 /*@ 4414 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4415 matrices from each processor 4416 4417 Collective on MPI_Comm 4418 4419 Input Parameters: 4420 + comm - the communicators the parallel matrix will live on 4421 . inmat - the input sequential matrices 4422 . n - number of local columns (or PETSC_DECIDE) 4423 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4424 4425 Output Parameter: 4426 . outmat - the parallel matrix generated 4427 4428 Level: advanced 4429 4430 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4431 4432 @*/ 4433 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4434 { 4435 PetscErrorCode ierr; 4436 4437 PetscFunctionBegin; 4438 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4439 if (scall == MAT_INITIAL_MATRIX){ 4440 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4441 } 4442 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4443 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4444 PetscFunctionReturn(0); 4445 } 4446 4447 #undef __FUNCT__ 4448 #define __FUNCT__ "MatFileSplit" 4449 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4450 { 4451 PetscErrorCode ierr; 4452 PetscMPIInt rank; 4453 PetscInt m,N,i,rstart,nnz; 4454 size_t len; 4455 const PetscInt *indx; 4456 PetscViewer out; 4457 char *name; 4458 Mat B; 4459 const PetscScalar *values; 4460 4461 PetscFunctionBegin; 4462 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4463 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4464 /* Should this be the type of the diagonal block of A? */ 4465 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4466 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4467 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4468 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4469 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4470 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4471 for (i=0;i<m;i++) { 4472 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4473 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4474 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4475 } 4476 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4477 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4478 4479 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4480 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4481 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4482 sprintf(name,"%s.%d",outfile,rank); 4483 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4484 ierr = PetscFree(name); 4485 ierr = MatView(B,out);CHKERRQ(ierr); 4486 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4487 ierr = MatDestroy(&B);CHKERRQ(ierr); 4488 PetscFunctionReturn(0); 4489 } 4490 4491 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4492 #undef __FUNCT__ 4493 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4494 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4495 { 4496 PetscErrorCode ierr; 4497 Mat_Merge_SeqsToMPI *merge; 4498 PetscContainer container; 4499 4500 PetscFunctionBegin; 4501 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4502 if (container) { 4503 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4505 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4506 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4507 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4508 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4509 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4510 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4511 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4512 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4513 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4514 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4515 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4516 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4517 ierr = PetscFree(merge);CHKERRQ(ierr); 4518 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4519 } 4520 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4521 PetscFunctionReturn(0); 4522 } 4523 4524 #include <../src/mat/utils/freespace.h> 4525 #include <petscbt.h> 4526 4527 #undef __FUNCT__ 4528 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4529 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4530 { 4531 PetscErrorCode ierr; 4532 MPI_Comm comm=((PetscObject)mpimat)->comm; 4533 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4534 PetscMPIInt size,rank,taga,*len_s; 4535 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4536 PetscInt proc,m; 4537 PetscInt **buf_ri,**buf_rj; 4538 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4539 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4540 MPI_Request *s_waits,*r_waits; 4541 MPI_Status *status; 4542 MatScalar *aa=a->a; 4543 MatScalar **abuf_r,*ba_i; 4544 Mat_Merge_SeqsToMPI *merge; 4545 PetscContainer container; 4546 4547 PetscFunctionBegin; 4548 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4549 4550 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4551 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4552 4553 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4554 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4555 4556 bi = merge->bi; 4557 bj = merge->bj; 4558 buf_ri = merge->buf_ri; 4559 buf_rj = merge->buf_rj; 4560 4561 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4562 owners = merge->rowmap->range; 4563 len_s = merge->len_s; 4564 4565 /* send and recv matrix values */ 4566 /*-----------------------------*/ 4567 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4568 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4569 4570 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4571 for (proc=0,k=0; proc<size; proc++){ 4572 if (!len_s[proc]) continue; 4573 i = owners[proc]; 4574 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4575 k++; 4576 } 4577 4578 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4579 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4580 ierr = PetscFree(status);CHKERRQ(ierr); 4581 4582 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4583 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4584 4585 /* insert mat values of mpimat */ 4586 /*----------------------------*/ 4587 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4588 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4589 4590 for (k=0; k<merge->nrecv; k++){ 4591 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4592 nrows = *(buf_ri_k[k]); 4593 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4594 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4595 } 4596 4597 /* set values of ba */ 4598 m = merge->rowmap->n; 4599 for (i=0; i<m; i++) { 4600 arow = owners[rank] + i; 4601 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4602 bnzi = bi[i+1] - bi[i]; 4603 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4604 4605 /* add local non-zero vals of this proc's seqmat into ba */ 4606 anzi = ai[arow+1] - ai[arow]; 4607 aj = a->j + ai[arow]; 4608 aa = a->a + ai[arow]; 4609 nextaj = 0; 4610 for (j=0; nextaj<anzi; j++){ 4611 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4612 ba_i[j] += aa[nextaj++]; 4613 } 4614 } 4615 4616 /* add received vals into ba */ 4617 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4618 /* i-th row */ 4619 if (i == *nextrow[k]) { 4620 anzi = *(nextai[k]+1) - *nextai[k]; 4621 aj = buf_rj[k] + *(nextai[k]); 4622 aa = abuf_r[k] + *(nextai[k]); 4623 nextaj = 0; 4624 for (j=0; nextaj<anzi; j++){ 4625 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4626 ba_i[j] += aa[nextaj++]; 4627 } 4628 } 4629 nextrow[k]++; nextai[k]++; 4630 } 4631 } 4632 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4633 } 4634 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4635 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4636 4637 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4638 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4639 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4640 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4641 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4642 PetscFunctionReturn(0); 4643 } 4644 4645 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4646 4647 #undef __FUNCT__ 4648 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4649 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4650 { 4651 PetscErrorCode ierr; 4652 Mat B_mpi; 4653 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4654 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4655 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4656 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4657 PetscInt len,proc,*dnz,*onz,bs,cbs; 4658 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4659 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4660 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4661 MPI_Status *status; 4662 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4663 PetscBT lnkbt; 4664 Mat_Merge_SeqsToMPI *merge; 4665 PetscContainer container; 4666 4667 PetscFunctionBegin; 4668 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4669 4670 /* make sure it is a PETSc comm */ 4671 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4672 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4673 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4674 4675 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4676 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4677 4678 /* determine row ownership */ 4679 /*---------------------------------------------------------*/ 4680 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4681 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4682 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4683 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4684 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4685 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4686 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4687 4688 m = merge->rowmap->n; 4689 M = merge->rowmap->N; 4690 owners = merge->rowmap->range; 4691 4692 /* determine the number of messages to send, their lengths */ 4693 /*---------------------------------------------------------*/ 4694 len_s = merge->len_s; 4695 4696 len = 0; /* length of buf_si[] */ 4697 merge->nsend = 0; 4698 for (proc=0; proc<size; proc++){ 4699 len_si[proc] = 0; 4700 if (proc == rank){ 4701 len_s[proc] = 0; 4702 } else { 4703 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4704 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4705 } 4706 if (len_s[proc]) { 4707 merge->nsend++; 4708 nrows = 0; 4709 for (i=owners[proc]; i<owners[proc+1]; i++){ 4710 if (ai[i+1] > ai[i]) nrows++; 4711 } 4712 len_si[proc] = 2*(nrows+1); 4713 len += len_si[proc]; 4714 } 4715 } 4716 4717 /* determine the number and length of messages to receive for ij-structure */ 4718 /*-------------------------------------------------------------------------*/ 4719 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4720 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4721 4722 /* post the Irecv of j-structure */ 4723 /*-------------------------------*/ 4724 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4725 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4726 4727 /* post the Isend of j-structure */ 4728 /*--------------------------------*/ 4729 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4730 4731 for (proc=0, k=0; proc<size; proc++){ 4732 if (!len_s[proc]) continue; 4733 i = owners[proc]; 4734 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4735 k++; 4736 } 4737 4738 /* receives and sends of j-structure are complete */ 4739 /*------------------------------------------------*/ 4740 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4741 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4742 4743 /* send and recv i-structure */ 4744 /*---------------------------*/ 4745 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4746 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4747 4748 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4749 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4750 for (proc=0,k=0; proc<size; proc++){ 4751 if (!len_s[proc]) continue; 4752 /* form outgoing message for i-structure: 4753 buf_si[0]: nrows to be sent 4754 [1:nrows]: row index (global) 4755 [nrows+1:2*nrows+1]: i-structure index 4756 */ 4757 /*-------------------------------------------*/ 4758 nrows = len_si[proc]/2 - 1; 4759 buf_si_i = buf_si + nrows+1; 4760 buf_si[0] = nrows; 4761 buf_si_i[0] = 0; 4762 nrows = 0; 4763 for (i=owners[proc]; i<owners[proc+1]; i++){ 4764 anzi = ai[i+1] - ai[i]; 4765 if (anzi) { 4766 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4767 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4768 nrows++; 4769 } 4770 } 4771 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4772 k++; 4773 buf_si += len_si[proc]; 4774 } 4775 4776 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4777 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4778 4779 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4780 for (i=0; i<merge->nrecv; i++){ 4781 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4782 } 4783 4784 ierr = PetscFree(len_si);CHKERRQ(ierr); 4785 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4786 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4787 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4788 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4789 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4790 ierr = PetscFree(status);CHKERRQ(ierr); 4791 4792 /* compute a local seq matrix in each processor */ 4793 /*----------------------------------------------*/ 4794 /* allocate bi array and free space for accumulating nonzero column info */ 4795 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4796 bi[0] = 0; 4797 4798 /* create and initialize a linked list */ 4799 nlnk = N+1; 4800 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4801 4802 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4803 len = 0; 4804 len = ai[owners[rank+1]] - ai[owners[rank]]; 4805 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4806 current_space = free_space; 4807 4808 /* determine symbolic info for each local row */ 4809 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4810 4811 for (k=0; k<merge->nrecv; k++){ 4812 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4813 nrows = *buf_ri_k[k]; 4814 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4815 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4816 } 4817 4818 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4819 len = 0; 4820 for (i=0;i<m;i++) { 4821 bnzi = 0; 4822 /* add local non-zero cols of this proc's seqmat into lnk */ 4823 arow = owners[rank] + i; 4824 anzi = ai[arow+1] - ai[arow]; 4825 aj = a->j + ai[arow]; 4826 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4827 bnzi += nlnk; 4828 /* add received col data into lnk */ 4829 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4830 if (i == *nextrow[k]) { /* i-th row */ 4831 anzi = *(nextai[k]+1) - *nextai[k]; 4832 aj = buf_rj[k] + *nextai[k]; 4833 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4834 bnzi += nlnk; 4835 nextrow[k]++; nextai[k]++; 4836 } 4837 } 4838 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4839 4840 /* if free space is not available, make more free space */ 4841 if (current_space->local_remaining<bnzi) { 4842 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4843 nspacedouble++; 4844 } 4845 /* copy data into free space, then initialize lnk */ 4846 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4847 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4848 4849 current_space->array += bnzi; 4850 current_space->local_used += bnzi; 4851 current_space->local_remaining -= bnzi; 4852 4853 bi[i+1] = bi[i] + bnzi; 4854 } 4855 4856 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4857 4858 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4859 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4860 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4861 4862 /* create symbolic parallel matrix B_mpi */ 4863 /*---------------------------------------*/ 4864 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4865 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4866 if (n==PETSC_DECIDE) { 4867 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4868 } else { 4869 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4870 } 4871 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4872 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4873 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4874 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4875 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4876 4877 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4878 B_mpi->assembled = PETSC_FALSE; 4879 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4880 merge->bi = bi; 4881 merge->bj = bj; 4882 merge->buf_ri = buf_ri; 4883 merge->buf_rj = buf_rj; 4884 merge->coi = PETSC_NULL; 4885 merge->coj = PETSC_NULL; 4886 merge->owners_co = PETSC_NULL; 4887 4888 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4889 4890 /* attach the supporting struct to B_mpi for reuse */ 4891 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4892 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4893 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4894 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4895 *mpimat = B_mpi; 4896 4897 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4898 PetscFunctionReturn(0); 4899 } 4900 4901 #undef __FUNCT__ 4902 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4903 /*@C 4904 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4905 matrices from each processor 4906 4907 Collective on MPI_Comm 4908 4909 Input Parameters: 4910 + comm - the communicators the parallel matrix will live on 4911 . seqmat - the input sequential matrices 4912 . m - number of local rows (or PETSC_DECIDE) 4913 . n - number of local columns (or PETSC_DECIDE) 4914 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4915 4916 Output Parameter: 4917 . mpimat - the parallel matrix generated 4918 4919 Level: advanced 4920 4921 Notes: 4922 The dimensions of the sequential matrix in each processor MUST be the same. 4923 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4924 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4925 @*/ 4926 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4927 { 4928 PetscErrorCode ierr; 4929 PetscMPIInt size; 4930 4931 PetscFunctionBegin; 4932 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4933 if (size == 1){ 4934 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4935 if (scall == MAT_INITIAL_MATRIX){ 4936 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4937 } else { 4938 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4939 } 4940 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4941 PetscFunctionReturn(0); 4942 } 4943 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4944 if (scall == MAT_INITIAL_MATRIX){ 4945 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4946 } 4947 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4948 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4949 PetscFunctionReturn(0); 4950 } 4951 4952 #undef __FUNCT__ 4953 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4954 /*@ 4955 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4956 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4957 with MatGetSize() 4958 4959 Not Collective 4960 4961 Input Parameters: 4962 + A - the matrix 4963 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4964 4965 Output Parameter: 4966 . A_loc - the local sequential matrix generated 4967 4968 Level: developer 4969 4970 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4971 4972 @*/ 4973 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4974 { 4975 PetscErrorCode ierr; 4976 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4977 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4978 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4979 MatScalar *aa=a->a,*ba=b->a,*cam; 4980 PetscScalar *ca; 4981 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4982 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4983 PetscBool match; 4984 4985 PetscFunctionBegin; 4986 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4987 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4988 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4989 if (scall == MAT_INITIAL_MATRIX){ 4990 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4991 ci[0] = 0; 4992 for (i=0; i<am; i++){ 4993 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4994 } 4995 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4996 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4997 k = 0; 4998 for (i=0; i<am; i++) { 4999 ncols_o = bi[i+1] - bi[i]; 5000 ncols_d = ai[i+1] - ai[i]; 5001 /* off-diagonal portion of A */ 5002 for (jo=0; jo<ncols_o; jo++) { 5003 col = cmap[*bj]; 5004 if (col >= cstart) break; 5005 cj[k] = col; bj++; 5006 ca[k++] = *ba++; 5007 } 5008 /* diagonal portion of A */ 5009 for (j=0; j<ncols_d; j++) { 5010 cj[k] = cstart + *aj++; 5011 ca[k++] = *aa++; 5012 } 5013 /* off-diagonal portion of A */ 5014 for (j=jo; j<ncols_o; j++) { 5015 cj[k] = cmap[*bj++]; 5016 ca[k++] = *ba++; 5017 } 5018 } 5019 /* put together the new matrix */ 5020 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5021 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5022 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5023 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5024 mat->free_a = PETSC_TRUE; 5025 mat->free_ij = PETSC_TRUE; 5026 mat->nonew = 0; 5027 } else if (scall == MAT_REUSE_MATRIX){ 5028 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5029 ci = mat->i; cj = mat->j; cam = mat->a; 5030 for (i=0; i<am; i++) { 5031 /* off-diagonal portion of A */ 5032 ncols_o = bi[i+1] - bi[i]; 5033 for (jo=0; jo<ncols_o; jo++) { 5034 col = cmap[*bj]; 5035 if (col >= cstart) break; 5036 *cam++ = *ba++; bj++; 5037 } 5038 /* diagonal portion of A */ 5039 ncols_d = ai[i+1] - ai[i]; 5040 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5041 /* off-diagonal portion of A */ 5042 for (j=jo; j<ncols_o; j++) { 5043 *cam++ = *ba++; bj++; 5044 } 5045 } 5046 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5047 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5048 PetscFunctionReturn(0); 5049 } 5050 5051 #undef __FUNCT__ 5052 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5053 /*@C 5054 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5055 5056 Not Collective 5057 5058 Input Parameters: 5059 + A - the matrix 5060 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5061 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5062 5063 Output Parameter: 5064 . A_loc - the local sequential matrix generated 5065 5066 Level: developer 5067 5068 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5069 5070 @*/ 5071 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5072 { 5073 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5074 PetscErrorCode ierr; 5075 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5076 IS isrowa,iscola; 5077 Mat *aloc; 5078 PetscBool match; 5079 5080 PetscFunctionBegin; 5081 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5082 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5083 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5084 if (!row){ 5085 start = A->rmap->rstart; end = A->rmap->rend; 5086 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5087 } else { 5088 isrowa = *row; 5089 } 5090 if (!col){ 5091 start = A->cmap->rstart; 5092 cmap = a->garray; 5093 nzA = a->A->cmap->n; 5094 nzB = a->B->cmap->n; 5095 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5096 ncols = 0; 5097 for (i=0; i<nzB; i++) { 5098 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5099 else break; 5100 } 5101 imark = i; 5102 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5103 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5104 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5105 } else { 5106 iscola = *col; 5107 } 5108 if (scall != MAT_INITIAL_MATRIX){ 5109 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5110 aloc[0] = *A_loc; 5111 } 5112 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5113 *A_loc = aloc[0]; 5114 ierr = PetscFree(aloc);CHKERRQ(ierr); 5115 if (!row){ 5116 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5117 } 5118 if (!col){ 5119 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5120 } 5121 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5122 PetscFunctionReturn(0); 5123 } 5124 5125 #undef __FUNCT__ 5126 #define __FUNCT__ "MatGetBrowsOfAcols" 5127 /*@C 5128 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5129 5130 Collective on Mat 5131 5132 Input Parameters: 5133 + A,B - the matrices in mpiaij format 5134 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5135 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5136 5137 Output Parameter: 5138 + rowb, colb - index sets of rows and columns of B to extract 5139 - B_seq - the sequential matrix generated 5140 5141 Level: developer 5142 5143 @*/ 5144 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5145 { 5146 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5147 PetscErrorCode ierr; 5148 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5149 IS isrowb,iscolb; 5150 Mat *bseq=PETSC_NULL; 5151 5152 PetscFunctionBegin; 5153 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5154 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5155 } 5156 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5157 5158 if (scall == MAT_INITIAL_MATRIX){ 5159 start = A->cmap->rstart; 5160 cmap = a->garray; 5161 nzA = a->A->cmap->n; 5162 nzB = a->B->cmap->n; 5163 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5164 ncols = 0; 5165 for (i=0; i<nzB; i++) { /* row < local row index */ 5166 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5167 else break; 5168 } 5169 imark = i; 5170 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5171 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5172 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5173 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5174 } else { 5175 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5176 isrowb = *rowb; iscolb = *colb; 5177 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5178 bseq[0] = *B_seq; 5179 } 5180 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5181 *B_seq = bseq[0]; 5182 ierr = PetscFree(bseq);CHKERRQ(ierr); 5183 if (!rowb){ 5184 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5185 } else { 5186 *rowb = isrowb; 5187 } 5188 if (!colb){ 5189 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5190 } else { 5191 *colb = iscolb; 5192 } 5193 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5194 PetscFunctionReturn(0); 5195 } 5196 5197 #undef __FUNCT__ 5198 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5199 /* 5200 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5201 of the OFF-DIAGONAL portion of local A 5202 5203 Collective on Mat 5204 5205 Input Parameters: 5206 + A,B - the matrices in mpiaij format 5207 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5208 5209 Output Parameter: 5210 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5211 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5212 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5213 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5214 5215 Level: developer 5216 5217 */ 5218 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5219 { 5220 VecScatter_MPI_General *gen_to,*gen_from; 5221 PetscErrorCode ierr; 5222 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5223 Mat_SeqAIJ *b_oth; 5224 VecScatter ctx=a->Mvctx; 5225 MPI_Comm comm=((PetscObject)ctx)->comm; 5226 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5227 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5228 PetscScalar *rvalues,*svalues; 5229 MatScalar *b_otha,*bufa,*bufA; 5230 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5231 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5232 MPI_Status *sstatus,rstatus; 5233 PetscMPIInt jj; 5234 PetscInt *cols,sbs,rbs; 5235 PetscScalar *vals; 5236 5237 PetscFunctionBegin; 5238 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5239 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5240 } 5241 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5242 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5243 5244 gen_to = (VecScatter_MPI_General*)ctx->todata; 5245 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5246 rvalues = gen_from->values; /* holds the length of receiving row */ 5247 svalues = gen_to->values; /* holds the length of sending row */ 5248 nrecvs = gen_from->n; 5249 nsends = gen_to->n; 5250 5251 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5252 srow = gen_to->indices; /* local row index to be sent */ 5253 sstarts = gen_to->starts; 5254 sprocs = gen_to->procs; 5255 sstatus = gen_to->sstatus; 5256 sbs = gen_to->bs; 5257 rstarts = gen_from->starts; 5258 rprocs = gen_from->procs; 5259 rbs = gen_from->bs; 5260 5261 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5262 if (scall == MAT_INITIAL_MATRIX){ 5263 /* i-array */ 5264 /*---------*/ 5265 /* post receives */ 5266 for (i=0; i<nrecvs; i++){ 5267 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5268 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5269 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5270 } 5271 5272 /* pack the outgoing message */ 5273 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5274 sstartsj[0] = 0; rstartsj[0] = 0; 5275 len = 0; /* total length of j or a array to be sent */ 5276 k = 0; 5277 for (i=0; i<nsends; i++){ 5278 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5279 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5280 for (j=0; j<nrows; j++) { 5281 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5282 for (l=0; l<sbs; l++){ 5283 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5284 rowlen[j*sbs+l] = ncols; 5285 len += ncols; 5286 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5287 } 5288 k++; 5289 } 5290 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5291 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5292 } 5293 /* recvs and sends of i-array are completed */ 5294 i = nrecvs; 5295 while (i--) { 5296 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5297 } 5298 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5299 5300 /* allocate buffers for sending j and a arrays */ 5301 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5302 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5303 5304 /* create i-array of B_oth */ 5305 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5306 b_othi[0] = 0; 5307 len = 0; /* total length of j or a array to be received */ 5308 k = 0; 5309 for (i=0; i<nrecvs; i++){ 5310 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5311 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5312 for (j=0; j<nrows; j++) { 5313 b_othi[k+1] = b_othi[k] + rowlen[j]; 5314 len += rowlen[j]; k++; 5315 } 5316 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5317 } 5318 5319 /* allocate space for j and a arrrays of B_oth */ 5320 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5321 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5322 5323 /* j-array */ 5324 /*---------*/ 5325 /* post receives of j-array */ 5326 for (i=0; i<nrecvs; i++){ 5327 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5328 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5329 } 5330 5331 /* pack the outgoing message j-array */ 5332 k = 0; 5333 for (i=0; i<nsends; i++){ 5334 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5335 bufJ = bufj+sstartsj[i]; 5336 for (j=0; j<nrows; j++) { 5337 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5338 for (ll=0; ll<sbs; ll++){ 5339 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5340 for (l=0; l<ncols; l++){ 5341 *bufJ++ = cols[l]; 5342 } 5343 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5344 } 5345 } 5346 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5347 } 5348 5349 /* recvs and sends of j-array are completed */ 5350 i = nrecvs; 5351 while (i--) { 5352 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5353 } 5354 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5355 } else if (scall == MAT_REUSE_MATRIX){ 5356 sstartsj = *startsj_s; 5357 rstartsj = *startsj_r; 5358 bufa = *bufa_ptr; 5359 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5360 b_otha = b_oth->a; 5361 } else { 5362 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5363 } 5364 5365 /* a-array */ 5366 /*---------*/ 5367 /* post receives of a-array */ 5368 for (i=0; i<nrecvs; i++){ 5369 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5370 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5371 } 5372 5373 /* pack the outgoing message a-array */ 5374 k = 0; 5375 for (i=0; i<nsends; i++){ 5376 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5377 bufA = bufa+sstartsj[i]; 5378 for (j=0; j<nrows; j++) { 5379 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5380 for (ll=0; ll<sbs; ll++){ 5381 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5382 for (l=0; l<ncols; l++){ 5383 *bufA++ = vals[l]; 5384 } 5385 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5386 } 5387 } 5388 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5389 } 5390 /* recvs and sends of a-array are completed */ 5391 i = nrecvs; 5392 while (i--) { 5393 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5394 } 5395 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5396 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5397 5398 if (scall == MAT_INITIAL_MATRIX){ 5399 /* put together the new matrix */ 5400 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5401 5402 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5403 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5404 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5405 b_oth->free_a = PETSC_TRUE; 5406 b_oth->free_ij = PETSC_TRUE; 5407 b_oth->nonew = 0; 5408 5409 ierr = PetscFree(bufj);CHKERRQ(ierr); 5410 if (!startsj_s || !bufa_ptr){ 5411 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5412 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5413 } else { 5414 *startsj_s = sstartsj; 5415 *startsj_r = rstartsj; 5416 *bufa_ptr = bufa; 5417 } 5418 } 5419 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5420 PetscFunctionReturn(0); 5421 } 5422 5423 #undef __FUNCT__ 5424 #define __FUNCT__ "MatGetCommunicationStructs" 5425 /*@C 5426 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5427 5428 Not Collective 5429 5430 Input Parameters: 5431 . A - The matrix in mpiaij format 5432 5433 Output Parameter: 5434 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5435 . colmap - A map from global column index to local index into lvec 5436 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5437 5438 Level: developer 5439 5440 @*/ 5441 #if defined (PETSC_USE_CTABLE) 5442 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5443 #else 5444 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5445 #endif 5446 { 5447 Mat_MPIAIJ *a; 5448 5449 PetscFunctionBegin; 5450 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5451 PetscValidPointer(lvec, 2); 5452 PetscValidPointer(colmap, 3); 5453 PetscValidPointer(multScatter, 4); 5454 a = (Mat_MPIAIJ *) A->data; 5455 if (lvec) *lvec = a->lvec; 5456 if (colmap) *colmap = a->colmap; 5457 if (multScatter) *multScatter = a->Mvctx; 5458 PetscFunctionReturn(0); 5459 } 5460 5461 EXTERN_C_BEGIN 5462 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5463 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5464 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5465 EXTERN_C_END 5466 5467 #undef __FUNCT__ 5468 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5469 /* 5470 Computes (B'*A')' since computing B*A directly is untenable 5471 5472 n p p 5473 ( ) ( ) ( ) 5474 m ( A ) * n ( B ) = m ( C ) 5475 ( ) ( ) ( ) 5476 5477 */ 5478 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5479 { 5480 PetscErrorCode ierr; 5481 Mat At,Bt,Ct; 5482 5483 PetscFunctionBegin; 5484 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5485 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5486 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5487 ierr = MatDestroy(&At);CHKERRQ(ierr); 5488 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5489 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5490 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5491 PetscFunctionReturn(0); 5492 } 5493 5494 #undef __FUNCT__ 5495 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5496 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5497 { 5498 PetscErrorCode ierr; 5499 PetscInt m=A->rmap->n,n=B->cmap->n; 5500 Mat Cmat; 5501 5502 PetscFunctionBegin; 5503 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5504 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5505 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5506 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5507 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5508 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5509 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5510 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5511 *C = Cmat; 5512 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5513 PetscFunctionReturn(0); 5514 } 5515 5516 /* ----------------------------------------------------------------*/ 5517 #undef __FUNCT__ 5518 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5519 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5520 { 5521 PetscErrorCode ierr; 5522 5523 PetscFunctionBegin; 5524 if (scall == MAT_INITIAL_MATRIX){ 5525 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5526 } 5527 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5528 PetscFunctionReturn(0); 5529 } 5530 5531 EXTERN_C_BEGIN 5532 #if defined(PETSC_HAVE_MUMPS) 5533 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5534 #endif 5535 #if defined(PETSC_HAVE_PASTIX) 5536 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5537 #endif 5538 #if defined(PETSC_HAVE_SUPERLU_DIST) 5539 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5540 #endif 5541 #if defined(PETSC_HAVE_SPOOLES) 5542 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5543 #endif 5544 #if defined(PETSC_HAVE_CLIQUE) 5545 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5546 #endif 5547 EXTERN_C_END 5548 5549 /*MC 5550 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5551 5552 Options Database Keys: 5553 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5554 5555 Level: beginner 5556 5557 .seealso: MatCreateAIJ() 5558 M*/ 5559 5560 EXTERN_C_BEGIN 5561 #undef __FUNCT__ 5562 #define __FUNCT__ "MatCreate_MPIAIJ" 5563 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5564 { 5565 Mat_MPIAIJ *b; 5566 PetscErrorCode ierr; 5567 PetscMPIInt size; 5568 5569 PetscFunctionBegin; 5570 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5571 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5572 B->data = (void*)b; 5573 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5574 B->assembled = PETSC_FALSE; 5575 B->insertmode = NOT_SET_VALUES; 5576 b->size = size; 5577 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5578 5579 /* build cache for off array entries formed */ 5580 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5581 b->donotstash = PETSC_FALSE; 5582 b->colmap = 0; 5583 b->garray = 0; 5584 b->roworiented = PETSC_TRUE; 5585 5586 /* stuff used for matrix vector multiply */ 5587 b->lvec = PETSC_NULL; 5588 b->Mvctx = PETSC_NULL; 5589 5590 /* stuff for MatGetRow() */ 5591 b->rowindices = 0; 5592 b->rowvalues = 0; 5593 b->getrowactive = PETSC_FALSE; 5594 5595 /* flexible pointer used in CUSP/CUSPARSE classes */ 5596 b->spptr = PETSC_NULL; 5597 #if defined(PETSC_HAVE_SPOOLES) 5598 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5599 "MatGetFactor_mpiaij_spooles", 5600 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5601 #endif 5602 #if defined(PETSC_HAVE_MUMPS) 5603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5604 "MatGetFactor_aij_mumps", 5605 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5606 #endif 5607 #if defined(PETSC_HAVE_PASTIX) 5608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5609 "MatGetFactor_mpiaij_pastix", 5610 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5611 #endif 5612 #if defined(PETSC_HAVE_SUPERLU_DIST) 5613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5614 "MatGetFactor_mpiaij_superlu_dist", 5615 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5616 #endif 5617 #if defined(PETSC_HAVE_CLIQUE) 5618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5619 "MatGetFactor_aij_clique", 5620 MatGetFactor_aij_clique);CHKERRQ(ierr); 5621 #endif 5622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5623 "MatStoreValues_MPIAIJ", 5624 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5626 "MatRetrieveValues_MPIAIJ", 5627 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5629 "MatGetDiagonalBlock_MPIAIJ", 5630 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5632 "MatIsTranspose_MPIAIJ", 5633 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5635 "MatMPIAIJSetPreallocation_MPIAIJ", 5636 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5637 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5638 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5639 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5640 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5641 "MatDiagonalScaleLocal_MPIAIJ", 5642 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5643 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5644 "MatConvert_MPIAIJ_MPIAIJPERM", 5645 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5646 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5647 "MatConvert_MPIAIJ_MPIAIJCRL", 5648 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5649 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5650 "MatConvert_MPIAIJ_MPISBAIJ", 5651 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5652 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5653 "MatMatMult_MPIDense_MPIAIJ", 5654 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5655 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5656 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5657 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5658 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5659 "MatMatMultNumeric_MPIDense_MPIAIJ", 5660 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5661 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5662 PetscFunctionReturn(0); 5663 } 5664 EXTERN_C_END 5665 5666 #undef __FUNCT__ 5667 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5668 /*@ 5669 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5670 and "off-diagonal" part of the matrix in CSR format. 5671 5672 Collective on MPI_Comm 5673 5674 Input Parameters: 5675 + comm - MPI communicator 5676 . m - number of local rows (Cannot be PETSC_DECIDE) 5677 . n - This value should be the same as the local size used in creating the 5678 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5679 calculated if N is given) For square matrices n is almost always m. 5680 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5681 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5682 . i - row indices for "diagonal" portion of matrix 5683 . j - column indices 5684 . a - matrix values 5685 . oi - row indices for "off-diagonal" portion of matrix 5686 . oj - column indices 5687 - oa - matrix values 5688 5689 Output Parameter: 5690 . mat - the matrix 5691 5692 Level: advanced 5693 5694 Notes: 5695 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5696 must free the arrays once the matrix has been destroyed and not before. 5697 5698 The i and j indices are 0 based 5699 5700 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5701 5702 This sets local rows and cannot be used to set off-processor values. 5703 5704 You cannot later use MatSetValues() to change values in this matrix. 5705 5706 .keywords: matrix, aij, compressed row, sparse, parallel 5707 5708 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5709 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5710 @*/ 5711 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5712 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5713 { 5714 PetscErrorCode ierr; 5715 Mat_MPIAIJ *maij; 5716 5717 PetscFunctionBegin; 5718 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5719 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5720 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5721 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5722 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5723 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5724 maij = (Mat_MPIAIJ*) (*mat)->data; 5725 maij->donotstash = PETSC_TRUE; 5726 (*mat)->preallocated = PETSC_TRUE; 5727 5728 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5729 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5730 5731 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5732 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5733 5734 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5735 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5736 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5737 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5738 5739 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5740 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5741 PetscFunctionReturn(0); 5742 } 5743 5744 /* 5745 Special version for direct calls from Fortran 5746 */ 5747 #include <petsc-private/fortranimpl.h> 5748 5749 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5750 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5751 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5752 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5753 #endif 5754 5755 /* Change these macros so can be used in void function */ 5756 #undef CHKERRQ 5757 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5758 #undef SETERRQ2 5759 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5760 #undef SETERRQ3 5761 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5762 #undef SETERRQ 5763 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5764 5765 EXTERN_C_BEGIN 5766 #undef __FUNCT__ 5767 #define __FUNCT__ "matsetvaluesmpiaij_" 5768 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5769 { 5770 Mat mat = *mmat; 5771 PetscInt m = *mm, n = *mn; 5772 InsertMode addv = *maddv; 5773 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5774 PetscScalar value; 5775 PetscErrorCode ierr; 5776 5777 MatCheckPreallocated(mat,1); 5778 if (mat->insertmode == NOT_SET_VALUES) { 5779 mat->insertmode = addv; 5780 } 5781 #if defined(PETSC_USE_DEBUG) 5782 else if (mat->insertmode != addv) { 5783 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5784 } 5785 #endif 5786 { 5787 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5788 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5789 PetscBool roworiented = aij->roworiented; 5790 5791 /* Some Variables required in the macro */ 5792 Mat A = aij->A; 5793 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5794 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5795 MatScalar *aa = a->a; 5796 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5797 Mat B = aij->B; 5798 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5799 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5800 MatScalar *ba = b->a; 5801 5802 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5803 PetscInt nonew = a->nonew; 5804 MatScalar *ap1,*ap2; 5805 5806 PetscFunctionBegin; 5807 for (i=0; i<m; i++) { 5808 if (im[i] < 0) continue; 5809 #if defined(PETSC_USE_DEBUG) 5810 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5811 #endif 5812 if (im[i] >= rstart && im[i] < rend) { 5813 row = im[i] - rstart; 5814 lastcol1 = -1; 5815 rp1 = aj + ai[row]; 5816 ap1 = aa + ai[row]; 5817 rmax1 = aimax[row]; 5818 nrow1 = ailen[row]; 5819 low1 = 0; 5820 high1 = nrow1; 5821 lastcol2 = -1; 5822 rp2 = bj + bi[row]; 5823 ap2 = ba + bi[row]; 5824 rmax2 = bimax[row]; 5825 nrow2 = bilen[row]; 5826 low2 = 0; 5827 high2 = nrow2; 5828 5829 for (j=0; j<n; j++) { 5830 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5831 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5832 if (in[j] >= cstart && in[j] < cend){ 5833 col = in[j] - cstart; 5834 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5835 } else if (in[j] < 0) continue; 5836 #if defined(PETSC_USE_DEBUG) 5837 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5838 #endif 5839 else { 5840 if (mat->was_assembled) { 5841 if (!aij->colmap) { 5842 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5843 } 5844 #if defined (PETSC_USE_CTABLE) 5845 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5846 col--; 5847 #else 5848 col = aij->colmap[in[j]] - 1; 5849 #endif 5850 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5851 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5852 col = in[j]; 5853 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5854 B = aij->B; 5855 b = (Mat_SeqAIJ*)B->data; 5856 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5857 rp2 = bj + bi[row]; 5858 ap2 = ba + bi[row]; 5859 rmax2 = bimax[row]; 5860 nrow2 = bilen[row]; 5861 low2 = 0; 5862 high2 = nrow2; 5863 bm = aij->B->rmap->n; 5864 ba = b->a; 5865 } 5866 } else col = in[j]; 5867 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5868 } 5869 } 5870 } else { 5871 if (!aij->donotstash) { 5872 if (roworiented) { 5873 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5874 } else { 5875 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5876 } 5877 } 5878 } 5879 }} 5880 PetscFunctionReturnVoid(); 5881 } 5882 EXTERN_C_END 5883 5884