1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 109 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 110 { 111 PetscErrorCode ierr; 112 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 113 114 PetscFunctionBegin; 115 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 116 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 117 } else { 118 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 119 } 120 PetscFunctionReturn(0); 121 } 122 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 127 { 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 129 PetscErrorCode ierr; 130 PetscInt i,rstart,nrows,*rows; 131 132 PetscFunctionBegin; 133 *zrows = NULL; 134 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 135 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 136 for (i=0; i<nrows; i++) rows[i] += rstart; 137 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 144 { 145 PetscErrorCode ierr; 146 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 147 PetscInt i,n,*garray = aij->garray; 148 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 149 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 150 PetscReal *work; 151 152 PetscFunctionBegin; 153 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 154 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 155 if (type == NORM_2) { 156 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 157 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 158 } 159 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 160 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 161 } 162 } else if (type == NORM_1) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 168 } 169 } else if (type == NORM_INFINITY) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 175 } 176 177 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 178 if (type == NORM_INFINITY) { 179 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 180 } else { 181 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 182 } 183 ierr = PetscFree(work);CHKERRQ(ierr); 184 if (type == NORM_2) { 185 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatDistribute_MPIAIJ" 192 /* 193 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 194 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 195 196 Only for square matrices 197 198 Used by a preconditioner, hence PETSC_EXTERN 199 */ 200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 201 { 202 PetscMPIInt rank,size; 203 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 204 PetscErrorCode ierr; 205 Mat mat; 206 Mat_SeqAIJ *gmata; 207 PetscMPIInt tag; 208 MPI_Status status; 209 PetscBool aij; 210 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 211 212 PetscFunctionBegin; 213 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 215 if (!rank) { 216 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 217 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 218 } 219 if (reuse == MAT_INITIAL_MATRIX) { 220 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 221 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 222 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 223 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 224 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 225 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 226 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 227 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 228 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 229 230 rowners[0] = 0; 231 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 232 rstart = rowners[rank]; 233 rend = rowners[rank+1]; 234 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 235 if (!rank) { 236 gmata = (Mat_SeqAIJ*) gmat->data; 237 /* send row lengths to all processors */ 238 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 239 for (i=1; i<size; i++) { 240 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 /* determine number diagonal and off-diagonal counts */ 243 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 244 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 245 jj = 0; 246 for (i=0; i<m; i++) { 247 for (j=0; j<dlens[i]; j++) { 248 if (gmata->j[jj] < rstart) ld[i]++; 249 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 250 jj++; 251 } 252 } 253 /* send column indices to other processes */ 254 for (i=1; i<size; i++) { 255 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 256 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 257 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 258 } 259 260 /* send numerical values to other processes */ 261 for (i=1; i<size; i++) { 262 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 263 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 264 } 265 gmataa = gmata->a; 266 gmataj = gmata->j; 267 268 } else { 269 /* receive row lengths */ 270 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 271 /* receive column indices */ 272 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 273 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 275 /* determine number diagonal and off-diagonal counts */ 276 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 277 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 278 jj = 0; 279 for (i=0; i<m; i++) { 280 for (j=0; j<dlens[i]; j++) { 281 if (gmataj[jj] < rstart) ld[i]++; 282 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 283 jj++; 284 } 285 } 286 /* receive numerical values */ 287 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 288 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 289 } 290 /* set preallocation */ 291 for (i=0; i<m; i++) { 292 dlens[i] -= olens[i]; 293 } 294 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 295 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 296 297 for (i=0; i<m; i++) { 298 dlens[i] += olens[i]; 299 } 300 cnt = 0; 301 for (i=0; i<m; i++) { 302 row = rstart + i; 303 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 304 cnt += dlens[i]; 305 } 306 if (rank) { 307 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 308 } 309 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 310 ierr = PetscFree(rowners);CHKERRQ(ierr); 311 312 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 313 314 *inmat = mat; 315 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 316 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 317 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 318 mat = *inmat; 319 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 320 if (!rank) { 321 /* send numerical values to other processes */ 322 gmata = (Mat_SeqAIJ*) gmat->data; 323 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 324 gmataa = gmata->a; 325 for (i=1; i<size; i++) { 326 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 327 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 328 } 329 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 330 } else { 331 /* receive numerical values from process 0*/ 332 nz = Ad->nz + Ao->nz; 333 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 334 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 335 } 336 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 337 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 338 ad = Ad->a; 339 ao = Ao->a; 340 if (mat->rmap->n) { 341 i = 0; 342 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 343 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 344 } 345 for (i=1; i<mat->rmap->n; i++) { 346 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 347 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 348 } 349 i--; 350 if (mat->rmap->n) { 351 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 352 } 353 if (rank) { 354 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 355 } 356 } 357 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to acess. 368 */ 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 372 { 373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 374 PetscErrorCode ierr; 375 PetscInt n = aij->B->cmap->n,i; 376 377 PetscFunctionBegin; 378 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 379 #if defined(PETSC_USE_CTABLE) 380 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 381 for (i=0; i<n; i++) { 382 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 383 } 384 #else 385 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 386 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 387 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 388 #endif 389 PetscFunctionReturn(0); 390 } 391 392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 393 { \ 394 if (col <= lastcol1) low1 = 0; \ 395 else high1 = nrow1; \ 396 lastcol1 = col;\ 397 while (high1-low1 > 5) { \ 398 t = (low1+high1)/2; \ 399 if (rp1[t] > col) high1 = t; \ 400 else low1 = t; \ 401 } \ 402 for (_i=low1; _i<high1; _i++) { \ 403 if (rp1[_i] > col) break; \ 404 if (rp1[_i] == col) { \ 405 if (addv == ADD_VALUES) ap1[_i] += value; \ 406 else ap1[_i] = value; \ 407 goto a_noinsert; \ 408 } \ 409 } \ 410 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 411 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 412 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 413 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 414 N = nrow1++ - 1; a->nz++; high1++; \ 415 /* shift up all the later entries in this row */ \ 416 for (ii=N; ii>=_i; ii--) { \ 417 rp1[ii+1] = rp1[ii]; \ 418 ap1[ii+1] = ap1[ii]; \ 419 } \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++;\ 423 a_noinsert: ; \ 424 ailen[row] = nrow1; \ 425 } 426 427 428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 429 { \ 430 if (col <= lastcol2) low2 = 0; \ 431 else high2 = nrow2; \ 432 lastcol2 = col; \ 433 while (high2-low2 > 5) { \ 434 t = (low2+high2)/2; \ 435 if (rp2[t] > col) high2 = t; \ 436 else low2 = t; \ 437 } \ 438 for (_i=low2; _i<high2; _i++) { \ 439 if (rp2[_i] > col) break; \ 440 if (rp2[_i] == col) { \ 441 if (addv == ADD_VALUES) ap2[_i] += value; \ 442 else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 447 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 448 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 449 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 450 N = nrow2++ - 1; b->nz++; high2++; \ 451 /* shift up all the later entries in this row */ \ 452 for (ii=N; ii>=_i; ii--) { \ 453 rp2[ii+1] = rp2[ii]; \ 454 ap2[ii+1] = ap2[ii]; \ 455 } \ 456 rp2[_i] = col; \ 457 ap2[_i] = value; \ 458 B->nonzerostate++; \ 459 b_noinsert: ; \ 460 bilen[row] = nrow2; \ 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 466 { 467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 469 PetscErrorCode ierr; 470 PetscInt l,*garray = mat->garray,diag; 471 472 PetscFunctionBegin; 473 /* code only works for square matrices A */ 474 475 /* find size of row to the left of the diagonal part */ 476 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 477 row = row - diag; 478 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 479 if (garray[b->j[b->i[row]+l]] > diag) break; 480 } 481 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 482 483 /* diagonal part */ 484 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 485 486 /* right of diagonal part */ 487 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatSetValues_MPIAIJ" 493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 494 { 495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 496 PetscScalar value; 497 PetscErrorCode ierr; 498 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 499 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 500 PetscBool roworiented = aij->roworiented; 501 502 /* Some Variables required in the macro */ 503 Mat A = aij->A; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 505 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 506 MatScalar *aa = a->a; 507 PetscBool ignorezeroentries = a->ignorezeroentries; 508 Mat B = aij->B; 509 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 510 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 511 MatScalar *ba = b->a; 512 513 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 514 PetscInt nonew; 515 MatScalar *ap1,*ap2; 516 517 PetscFunctionBegin; 518 for (i=0; i<m; i++) { 519 if (im[i] < 0) continue; 520 #if defined(PETSC_USE_DEBUG) 521 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 522 #endif 523 if (im[i] >= rstart && im[i] < rend) { 524 row = im[i] - rstart; 525 lastcol1 = -1; 526 rp1 = aj + ai[row]; 527 ap1 = aa + ai[row]; 528 rmax1 = aimax[row]; 529 nrow1 = ailen[row]; 530 low1 = 0; 531 high1 = nrow1; 532 lastcol2 = -1; 533 rp2 = bj + bi[row]; 534 ap2 = ba + bi[row]; 535 rmax2 = bimax[row]; 536 nrow2 = bilen[row]; 537 low2 = 0; 538 high2 = nrow2; 539 540 for (j=0; j<n; j++) { 541 if (roworiented) value = v[i*n+j]; 542 else value = v[i+j*m]; 543 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 544 if (in[j] >= cstart && in[j] < cend) { 545 col = in[j] - cstart; 546 nonew = a->nonew; 547 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 548 } else if (in[j] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 551 #endif 552 else { 553 if (mat->was_assembled) { 554 if (!aij->colmap) { 555 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 556 } 557 #if defined(PETSC_USE_CTABLE) 558 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 559 col--; 560 #else 561 col = aij->colmap[in[j]] - 1; 562 #endif 563 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 564 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 565 col = in[j]; 566 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 567 B = aij->B; 568 b = (Mat_SeqAIJ*)B->data; 569 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 570 rp2 = bj + bi[row]; 571 ap2 = ba + bi[row]; 572 rmax2 = bimax[row]; 573 nrow2 = bilen[row]; 574 low2 = 0; 575 high2 = nrow2; 576 bm = aij->B->rmap->n; 577 ba = b->a; 578 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 579 } else col = in[j]; 580 nonew = b->nonew; 581 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 582 } 583 } 584 } else { 585 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 586 if (!aij->donotstash) { 587 mat->assembled = PETSC_FALSE; 588 if (roworiented) { 589 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 590 } else { 591 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 592 } 593 } 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatGetValues_MPIAIJ" 601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 602 { 603 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 604 PetscErrorCode ierr; 605 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 606 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 607 608 PetscFunctionBegin; 609 for (i=0; i<m; i++) { 610 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 611 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 612 if (idxm[i] >= rstart && idxm[i] < rend) { 613 row = idxm[i] - rstart; 614 for (j=0; j<n; j++) { 615 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 616 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 617 if (idxn[j] >= cstart && idxn[j] < cend) { 618 col = idxn[j] - cstart; 619 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } else { 621 if (!aij->colmap) { 622 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 623 } 624 #if defined(PETSC_USE_CTABLE) 625 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 626 col--; 627 #else 628 col = aij->colmap[idxn[j]] - 1; 629 #endif 630 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 631 else { 632 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } 634 } 635 } 636 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 637 } 638 PetscFunctionReturn(0); 639 } 640 641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 648 PetscErrorCode ierr; 649 PetscInt nstash,reallocs; 650 InsertMode addv; 651 652 PetscFunctionBegin; 653 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 654 655 /* make sure all processors are either in INSERTMODE or ADDMODE */ 656 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 657 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 658 mat->insertmode = addv; /* in case this processor had no cache */ 659 660 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 672 PetscErrorCode ierr; 673 PetscMPIInt n; 674 PetscInt i,j,rstart,ncols,flg; 675 PetscInt *row,*col; 676 PetscBool other_disassembled; 677 PetscScalar *val; 678 InsertMode addv = mat->insertmode; 679 680 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 681 682 PetscFunctionBegin; 683 if (!aij->donotstash && !mat->nooffprocentries) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 if (!flg) break; 687 688 for (i=0; i<n; ) { 689 /* Now identify the consecutive vals belonging to the same row */ 690 for (j=i,rstart=row[j]; j<n; j++) { 691 if (row[j] != rstart) break; 692 } 693 if (j < n) ncols = j-i; 694 else ncols = n-i; 695 /* Now assemble all these values with a single function call */ 696 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 697 698 i = j; 699 } 700 } 701 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 702 } 703 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 705 706 /* determine if any processor has disassembled, if so we must 707 also disassemble ourselfs, in order that we may reassemble. */ 708 /* 709 if nonzero structure of submatrix B cannot change then we know that 710 no processor disassembled thus we can skip this stuff 711 */ 712 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 713 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 714 if (mat->was_assembled && !other_disassembled) { 715 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 716 } 717 } 718 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 719 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 720 } 721 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 722 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 724 725 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 726 727 aij->rowvalues = 0; 728 729 /* used by MatAXPY() */ 730 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 731 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 732 733 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 734 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 735 736 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 737 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 738 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 739 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 740 } 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 747 { 748 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 753 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatZeroRows_MPIAIJ" 759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 760 { 761 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 762 PetscInt *owners = A->rmap->range; 763 PetscInt n = A->rmap->n; 764 PetscSF sf; 765 PetscInt *lrows; 766 PetscSFNode *rrows; 767 PetscInt r, p = 0, len = 0; 768 PetscErrorCode ierr; 769 770 PetscFunctionBegin; 771 /* Create SF where leaves are input rows and roots are owned rows */ 772 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 773 for (r = 0; r < n; ++r) lrows[r] = -1; 774 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 775 for (r = 0; r < N; ++r) { 776 const PetscInt idx = rows[r]; 777 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 778 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 779 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 780 } 781 if (A->nooffproczerorows) { 782 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 783 lrows[len++] = idx - owners[p]; 784 } else { 785 rrows[r].rank = p; 786 rrows[r].index = rows[r] - owners[p]; 787 } 788 } 789 if (!A->nooffproczerorows) { 790 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 791 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 792 /* Collect flags for rows to be zeroed */ 793 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 794 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 795 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 796 /* Compress and put in row numbers */ 797 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 798 } 799 /* fix right hand side if needed */ 800 if (x && b) { 801 const PetscScalar *xx; 802 PetscScalar *bb; 803 804 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 805 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 806 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 807 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 808 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 809 } 810 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 811 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 812 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 813 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 814 } else if (diag != 0.0) { 815 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 828 /* only change matrix nonzero state if pattern was allowed to be changed */ 829 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 830 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 831 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,p = 0,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x) { 888 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 891 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 892 } 893 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 894 /* remove zeroed rows of off diagonal matrix */ 895 ii = aij->i; 896 for (i=0; i<len; i++) { 897 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 898 } 899 /* loop over all elements of off process part of matrix zeroing removed columns*/ 900 if (aij->compressedrow.use) { 901 m = aij->compressedrow.nrows; 902 ii = aij->compressedrow.i; 903 ridx = aij->compressedrow.rindex; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 909 for (j=0; j<n; j++) { 910 if (PetscAbsScalar(mask[*aj])) { 911 if (b) bb[*ridx] -= *aa*xx[*aj]; 912 *aa = 0.0; 913 } 914 aa++; 915 aj++; 916 } 917 ridx++; 918 } 919 } else { /* do not use compressed row format */ 920 m = l->B->rmap->n; 921 for (i=0; i<m; i++) { 922 n = ii[i+1] - ii[i]; 923 aj = aij->j + ii[i]; 924 aa = aij->a + ii[i]; 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[i] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 } 934 } 935 if (x) { 936 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 937 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 938 } 939 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 940 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 941 ierr = PetscFree(lrows);CHKERRQ(ierr); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMult_MPIAIJ" 953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 954 { 955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt nt; 958 959 PetscFunctionBegin; 960 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 961 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 962 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultAdd_MPIAIJ" 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 PetscBool merged; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1006 /* do nondiagonal part */ 1007 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1008 if (!merged) { 1009 /* send it on its way */ 1010 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* receive remote parts: note this assumes the values are not actually */ 1014 /* added in yy until the next line, */ 1015 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 } else { 1017 /* do local part */ 1018 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1019 /* send it on its way */ 1020 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 /* values actually were received in the Begin() but we need to call this nop */ 1022 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1029 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1030 { 1031 MPI_Comm comm; 1032 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1033 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1034 IS Me,Notme; 1035 PetscErrorCode ierr; 1036 PetscInt M,N,first,last,*notme,i; 1037 PetscMPIInt size; 1038 1039 PetscFunctionBegin; 1040 /* Easy test: symmetric diagonal block */ 1041 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1042 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 /* do nondiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 /* send it on its way */ 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 /* do local part */ 1082 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1083 /* receive remote parts */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* 1089 This only works correctly for square matrices where the subblock A->A is the 1090 diagonal block 1091 */ 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1095 { 1096 PetscErrorCode ierr; 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1101 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1102 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatScale_MPIAIJ" 1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1115 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatDestroy_Redundant" 1121 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1122 { 1123 PetscErrorCode ierr; 1124 Mat_Redundant *redund = *redundant; 1125 PetscInt i; 1126 1127 PetscFunctionBegin; 1128 *redundant = NULL; 1129 if (redund){ 1130 if (redund->matseq) { /* via MatGetSubMatrices() */ 1131 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1132 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1133 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1134 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1135 } else { 1136 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1137 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1138 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1139 for (i=0; i<redund->nrecvs; i++) { 1140 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1141 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1142 } 1143 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1144 } 1145 1146 if (redund->psubcomm) { 1147 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1148 } 1149 ierr = PetscFree(redund);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatDestroy_MPIAIJ" 1156 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1157 { 1158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1159 PetscErrorCode ierr; 1160 1161 PetscFunctionBegin; 1162 #if defined(PETSC_USE_LOG) 1163 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1164 #endif 1165 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1166 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1167 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1168 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1169 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1170 #if defined(PETSC_USE_CTABLE) 1171 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1172 #else 1173 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1174 #endif 1175 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1176 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1177 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1178 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1179 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1180 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1181 1182 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1188 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1189 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1190 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1191 #if defined(PETSC_HAVE_ELEMENTAL) 1192 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1193 #endif 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1199 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1200 { 1201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1202 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1203 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1204 PetscErrorCode ierr; 1205 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1206 int fd; 1207 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1208 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1209 PetscScalar *column_values; 1210 PetscInt message_count,flowcontrolcount; 1211 FILE *file; 1212 1213 PetscFunctionBegin; 1214 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1215 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1216 nz = A->nz + B->nz; 1217 if (!rank) { 1218 header[0] = MAT_FILE_CLASSID; 1219 header[1] = mat->rmap->N; 1220 header[2] = mat->cmap->N; 1221 1222 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1223 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1224 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1225 /* get largest number of rows any processor has */ 1226 rlen = mat->rmap->n; 1227 range = mat->rmap->range; 1228 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1229 } else { 1230 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 rlen = mat->rmap->n; 1232 } 1233 1234 /* load up the local row counts */ 1235 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1236 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1237 1238 /* store the row lengths to the file */ 1239 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1240 if (!rank) { 1241 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1242 for (i=1; i<size; i++) { 1243 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1244 rlen = range[i+1] - range[i]; 1245 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1246 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1247 } 1248 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1249 } else { 1250 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1251 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1252 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1253 } 1254 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1255 1256 /* load up the local column indices */ 1257 nzmax = nz; /* th processor needs space a largest processor needs */ 1258 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1259 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1260 cnt = 0; 1261 for (i=0; i<mat->rmap->n; i++) { 1262 for (j=B->i[i]; j<B->i[i+1]; j++) { 1263 if ((col = garray[B->j[j]]) > cstart) break; 1264 column_indices[cnt++] = col; 1265 } 1266 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1267 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1268 } 1269 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1270 1271 /* store the column indices to the file */ 1272 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1273 if (!rank) { 1274 MPI_Status status; 1275 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1276 for (i=1; i<size; i++) { 1277 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1278 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1279 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1280 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1281 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1282 } 1283 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1284 } else { 1285 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1286 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1287 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1288 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1289 } 1290 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1291 1292 /* load up the local column values */ 1293 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1294 cnt = 0; 1295 for (i=0; i<mat->rmap->n; i++) { 1296 for (j=B->i[i]; j<B->i[i+1]; j++) { 1297 if (garray[B->j[j]] > cstart) break; 1298 column_values[cnt++] = B->a[j]; 1299 } 1300 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1301 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1302 } 1303 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1304 1305 /* store the column values to the file */ 1306 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1307 if (!rank) { 1308 MPI_Status status; 1309 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1310 for (i=1; i<size; i++) { 1311 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1312 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1313 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1314 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1315 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1316 } 1317 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1318 } else { 1319 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1320 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1321 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1322 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1323 } 1324 ierr = PetscFree(column_values);CHKERRQ(ierr); 1325 1326 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1327 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #include <petscdraw.h> 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1334 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1335 { 1336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1337 PetscErrorCode ierr; 1338 PetscMPIInt rank = aij->rank,size = aij->size; 1339 PetscBool isdraw,iascii,isbinary; 1340 PetscViewer sviewer; 1341 PetscViewerFormat format; 1342 1343 PetscFunctionBegin; 1344 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1345 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1346 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1347 if (iascii) { 1348 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1349 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1350 MatInfo info; 1351 PetscBool inodes; 1352 1353 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1354 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1355 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1356 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1357 if (!inodes) { 1358 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1359 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1360 } else { 1361 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1362 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1363 } 1364 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1365 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1366 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1367 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1368 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1369 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1370 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1371 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1374 PetscInt inodecount,inodelimit,*inodes; 1375 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1376 if (inodes) { 1377 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1378 } else { 1379 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1380 } 1381 PetscFunctionReturn(0); 1382 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1383 PetscFunctionReturn(0); 1384 } 1385 } else if (isbinary) { 1386 if (size == 1) { 1387 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1388 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1389 } else { 1390 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1391 } 1392 PetscFunctionReturn(0); 1393 } else if (isdraw) { 1394 PetscDraw draw; 1395 PetscBool isnull; 1396 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1397 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1398 } 1399 1400 { 1401 /* assemble the entire matrix onto first processor. */ 1402 Mat A; 1403 Mat_SeqAIJ *Aloc; 1404 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1405 MatScalar *a; 1406 1407 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1408 if (!rank) { 1409 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1410 } else { 1411 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1412 } 1413 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1414 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1415 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1416 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1417 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1418 1419 /* copy over the A part */ 1420 Aloc = (Mat_SeqAIJ*)aij->A->data; 1421 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1422 row = mat->rmap->rstart; 1423 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1424 for (i=0; i<m; i++) { 1425 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1426 row++; 1427 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1428 } 1429 aj = Aloc->j; 1430 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1431 1432 /* copy over the B part */ 1433 Aloc = (Mat_SeqAIJ*)aij->B->data; 1434 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1435 row = mat->rmap->rstart; 1436 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1437 ct = cols; 1438 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1439 for (i=0; i<m; i++) { 1440 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1441 row++; 1442 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1443 } 1444 ierr = PetscFree(ct);CHKERRQ(ierr); 1445 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1447 /* 1448 Everyone has to call to draw the matrix since the graphics waits are 1449 synchronized across all processors that share the PetscDraw object 1450 */ 1451 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1452 if (!rank) { 1453 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1454 } 1455 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1456 ierr = MatDestroy(&A);CHKERRQ(ierr); 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatView_MPIAIJ" 1463 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1464 { 1465 PetscErrorCode ierr; 1466 PetscBool iascii,isdraw,issocket,isbinary; 1467 1468 PetscFunctionBegin; 1469 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1472 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1473 if (iascii || isdraw || isbinary || issocket) { 1474 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1475 } 1476 PetscFunctionReturn(0); 1477 } 1478 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatSOR_MPIAIJ" 1481 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1482 { 1483 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1484 PetscErrorCode ierr; 1485 Vec bb1 = 0; 1486 PetscBool hasop; 1487 1488 PetscFunctionBegin; 1489 if (flag == SOR_APPLY_UPPER) { 1490 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1495 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1496 } 1497 1498 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1499 if (flag & SOR_ZERO_INITIAL_GUESS) { 1500 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1501 its--; 1502 } 1503 1504 while (its--) { 1505 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1507 1508 /* update rhs: bb1 = bb - B*x */ 1509 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1510 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1511 1512 /* local sweep */ 1513 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1514 } 1515 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1516 if (flag & SOR_ZERO_INITIAL_GUESS) { 1517 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1518 its--; 1519 } 1520 while (its--) { 1521 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1522 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1523 1524 /* update rhs: bb1 = bb - B*x */ 1525 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1526 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1527 1528 /* local sweep */ 1529 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1530 } 1531 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1532 if (flag & SOR_ZERO_INITIAL_GUESS) { 1533 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1534 its--; 1535 } 1536 while (its--) { 1537 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1538 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1539 1540 /* update rhs: bb1 = bb - B*x */ 1541 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1542 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1543 1544 /* local sweep */ 1545 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1546 } 1547 } else if (flag & SOR_EISENSTAT) { 1548 Vec xx1; 1549 1550 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1551 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1552 1553 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1554 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 if (!mat->diag) { 1556 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1557 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1558 } 1559 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1560 if (hasop) { 1561 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1562 } else { 1563 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1564 } 1565 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1566 1567 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1568 1569 /* local sweep */ 1570 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1571 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1572 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1573 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1574 1575 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatPermute_MPIAIJ" 1581 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1582 { 1583 Mat aA,aB,Aperm; 1584 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1585 PetscScalar *aa,*ba; 1586 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1587 PetscSF rowsf,sf; 1588 IS parcolp = NULL; 1589 PetscBool done; 1590 PetscErrorCode ierr; 1591 1592 PetscFunctionBegin; 1593 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1594 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1595 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1596 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1597 1598 /* Invert row permutation to find out where my rows should go */ 1599 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1600 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1601 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1602 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1603 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1604 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1605 1606 /* Invert column permutation to find out where my columns should go */ 1607 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1608 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1609 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1610 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1611 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1612 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1613 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1614 1615 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1616 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1617 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1618 1619 /* Find out where my gcols should go */ 1620 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1621 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1622 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1623 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1624 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1625 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1626 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1627 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1628 1629 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1630 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1631 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1632 for (i=0; i<m; i++) { 1633 PetscInt row = rdest[i],rowner; 1634 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1635 for (j=ai[i]; j<ai[i+1]; j++) { 1636 PetscInt cowner,col = cdest[aj[j]]; 1637 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1638 if (rowner == cowner) dnnz[i]++; 1639 else onnz[i]++; 1640 } 1641 for (j=bi[i]; j<bi[i+1]; j++) { 1642 PetscInt cowner,col = gcdest[bj[j]]; 1643 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1644 if (rowner == cowner) dnnz[i]++; 1645 else onnz[i]++; 1646 } 1647 } 1648 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1649 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1650 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1651 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1652 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1653 1654 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1655 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1656 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1657 for (i=0; i<m; i++) { 1658 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1659 PetscInt j0,rowlen; 1660 rowlen = ai[i+1] - ai[i]; 1661 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1662 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1663 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1664 } 1665 rowlen = bi[i+1] - bi[i]; 1666 for (j0=j=0; j<rowlen; j0=j) { 1667 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1668 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1669 } 1670 } 1671 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1672 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1673 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1674 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1675 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1676 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1677 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1678 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1679 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1680 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1681 *B = Aperm; 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1687 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1688 { 1689 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1690 Mat A = mat->A,B = mat->B; 1691 PetscErrorCode ierr; 1692 PetscReal isend[5],irecv[5]; 1693 1694 PetscFunctionBegin; 1695 info->block_size = 1.0; 1696 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1697 1698 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1699 isend[3] = info->memory; isend[4] = info->mallocs; 1700 1701 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1702 1703 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1704 isend[3] += info->memory; isend[4] += info->mallocs; 1705 if (flag == MAT_LOCAL) { 1706 info->nz_used = isend[0]; 1707 info->nz_allocated = isend[1]; 1708 info->nz_unneeded = isend[2]; 1709 info->memory = isend[3]; 1710 info->mallocs = isend[4]; 1711 } else if (flag == MAT_GLOBAL_MAX) { 1712 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1713 1714 info->nz_used = irecv[0]; 1715 info->nz_allocated = irecv[1]; 1716 info->nz_unneeded = irecv[2]; 1717 info->memory = irecv[3]; 1718 info->mallocs = irecv[4]; 1719 } else if (flag == MAT_GLOBAL_SUM) { 1720 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1721 1722 info->nz_used = irecv[0]; 1723 info->nz_allocated = irecv[1]; 1724 info->nz_unneeded = irecv[2]; 1725 info->memory = irecv[3]; 1726 info->mallocs = irecv[4]; 1727 } 1728 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1729 info->fill_ratio_needed = 0; 1730 info->factor_mallocs = 0; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatSetOption_MPIAIJ" 1736 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1737 { 1738 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 switch (op) { 1743 case MAT_NEW_NONZERO_LOCATIONS: 1744 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1745 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1746 case MAT_KEEP_NONZERO_PATTERN: 1747 case MAT_NEW_NONZERO_LOCATION_ERR: 1748 case MAT_USE_INODES: 1749 case MAT_IGNORE_ZERO_ENTRIES: 1750 MatCheckPreallocated(A,1); 1751 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1752 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1753 break; 1754 case MAT_ROW_ORIENTED: 1755 a->roworiented = flg; 1756 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1759 break; 1760 case MAT_NEW_DIAGONALS: 1761 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1762 break; 1763 case MAT_IGNORE_OFF_PROC_ENTRIES: 1764 a->donotstash = flg; 1765 break; 1766 case MAT_SPD: 1767 A->spd_set = PETSC_TRUE; 1768 A->spd = flg; 1769 if (flg) { 1770 A->symmetric = PETSC_TRUE; 1771 A->structurally_symmetric = PETSC_TRUE; 1772 A->symmetric_set = PETSC_TRUE; 1773 A->structurally_symmetric_set = PETSC_TRUE; 1774 } 1775 break; 1776 case MAT_SYMMETRIC: 1777 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1778 break; 1779 case MAT_STRUCTURALLY_SYMMETRIC: 1780 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1781 break; 1782 case MAT_HERMITIAN: 1783 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1784 break; 1785 case MAT_SYMMETRY_ETERNAL: 1786 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1787 break; 1788 default: 1789 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1790 } 1791 PetscFunctionReturn(0); 1792 } 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatGetRow_MPIAIJ" 1796 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1797 { 1798 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1799 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1800 PetscErrorCode ierr; 1801 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1802 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1803 PetscInt *cmap,*idx_p; 1804 1805 PetscFunctionBegin; 1806 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1807 mat->getrowactive = PETSC_TRUE; 1808 1809 if (!mat->rowvalues && (idx || v)) { 1810 /* 1811 allocate enough space to hold information from the longest row. 1812 */ 1813 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1814 PetscInt max = 1,tmp; 1815 for (i=0; i<matin->rmap->n; i++) { 1816 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1817 if (max < tmp) max = tmp; 1818 } 1819 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1820 } 1821 1822 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1823 lrow = row - rstart; 1824 1825 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1826 if (!v) {pvA = 0; pvB = 0;} 1827 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1828 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1829 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1830 nztot = nzA + nzB; 1831 1832 cmap = mat->garray; 1833 if (v || idx) { 1834 if (nztot) { 1835 /* Sort by increasing column numbers, assuming A and B already sorted */ 1836 PetscInt imark = -1; 1837 if (v) { 1838 *v = v_p = mat->rowvalues; 1839 for (i=0; i<nzB; i++) { 1840 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1841 else break; 1842 } 1843 imark = i; 1844 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1845 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1846 } 1847 if (idx) { 1848 *idx = idx_p = mat->rowindices; 1849 if (imark > -1) { 1850 for (i=0; i<imark; i++) { 1851 idx_p[i] = cmap[cworkB[i]]; 1852 } 1853 } else { 1854 for (i=0; i<nzB; i++) { 1855 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1856 else break; 1857 } 1858 imark = i; 1859 } 1860 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1861 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1862 } 1863 } else { 1864 if (idx) *idx = 0; 1865 if (v) *v = 0; 1866 } 1867 } 1868 *nz = nztot; 1869 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1870 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1876 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1877 { 1878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1879 1880 PetscFunctionBegin; 1881 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1882 aij->getrowactive = PETSC_FALSE; 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatNorm_MPIAIJ" 1888 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1889 { 1890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1891 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1892 PetscErrorCode ierr; 1893 PetscInt i,j,cstart = mat->cmap->rstart; 1894 PetscReal sum = 0.0; 1895 MatScalar *v; 1896 1897 PetscFunctionBegin; 1898 if (aij->size == 1) { 1899 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1900 } else { 1901 if (type == NORM_FROBENIUS) { 1902 v = amat->a; 1903 for (i=0; i<amat->nz; i++) { 1904 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1905 } 1906 v = bmat->a; 1907 for (i=0; i<bmat->nz; i++) { 1908 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1909 } 1910 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1911 *norm = PetscSqrtReal(*norm); 1912 } else if (type == NORM_1) { /* max column norm */ 1913 PetscReal *tmp,*tmp2; 1914 PetscInt *jj,*garray = aij->garray; 1915 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1916 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1917 *norm = 0.0; 1918 v = amat->a; jj = amat->j; 1919 for (j=0; j<amat->nz; j++) { 1920 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1921 } 1922 v = bmat->a; jj = bmat->j; 1923 for (j=0; j<bmat->nz; j++) { 1924 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1925 } 1926 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1927 for (j=0; j<mat->cmap->N; j++) { 1928 if (tmp2[j] > *norm) *norm = tmp2[j]; 1929 } 1930 ierr = PetscFree(tmp);CHKERRQ(ierr); 1931 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1932 } else if (type == NORM_INFINITY) { /* max row norm */ 1933 PetscReal ntemp = 0.0; 1934 for (j=0; j<aij->A->rmap->n; j++) { 1935 v = amat->a + amat->i[j]; 1936 sum = 0.0; 1937 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1938 sum += PetscAbsScalar(*v); v++; 1939 } 1940 v = bmat->a + bmat->i[j]; 1941 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1942 sum += PetscAbsScalar(*v); v++; 1943 } 1944 if (sum > ntemp) ntemp = sum; 1945 } 1946 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1947 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1948 } 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatTranspose_MPIAIJ" 1954 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1955 { 1956 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1957 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1958 PetscErrorCode ierr; 1959 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1960 PetscInt cstart = A->cmap->rstart,ncol; 1961 Mat B; 1962 MatScalar *array; 1963 1964 PetscFunctionBegin; 1965 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1966 1967 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1968 ai = Aloc->i; aj = Aloc->j; 1969 bi = Bloc->i; bj = Bloc->j; 1970 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1971 PetscInt *d_nnz,*g_nnz,*o_nnz; 1972 PetscSFNode *oloc; 1973 PETSC_UNUSED PetscSF sf; 1974 1975 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1976 /* compute d_nnz for preallocation */ 1977 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1978 for (i=0; i<ai[ma]; i++) { 1979 d_nnz[aj[i]]++; 1980 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1981 } 1982 /* compute local off-diagonal contributions */ 1983 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1984 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1985 /* map those to global */ 1986 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1987 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1988 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1989 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1990 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1991 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1992 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1993 1994 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1995 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1996 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1997 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1998 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1999 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2000 } else { 2001 B = *matout; 2002 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2003 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2004 } 2005 2006 /* copy over the A part */ 2007 array = Aloc->a; 2008 row = A->rmap->rstart; 2009 for (i=0; i<ma; i++) { 2010 ncol = ai[i+1]-ai[i]; 2011 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2012 row++; 2013 array += ncol; aj += ncol; 2014 } 2015 aj = Aloc->j; 2016 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2017 2018 /* copy over the B part */ 2019 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2020 array = Bloc->a; 2021 row = A->rmap->rstart; 2022 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2023 cols_tmp = cols; 2024 for (i=0; i<mb; i++) { 2025 ncol = bi[i+1]-bi[i]; 2026 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2027 row++; 2028 array += ncol; cols_tmp += ncol; 2029 } 2030 ierr = PetscFree(cols);CHKERRQ(ierr); 2031 2032 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2033 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2034 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2035 *matout = B; 2036 } else { 2037 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2038 } 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2044 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2045 { 2046 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2047 Mat a = aij->A,b = aij->B; 2048 PetscErrorCode ierr; 2049 PetscInt s1,s2,s3; 2050 2051 PetscFunctionBegin; 2052 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2053 if (rr) { 2054 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2055 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2056 /* Overlap communication with computation. */ 2057 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2058 } 2059 if (ll) { 2060 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2061 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2062 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2063 } 2064 /* scale the diagonal block */ 2065 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2066 2067 if (rr) { 2068 /* Do a scatter end and then right scale the off-diagonal block */ 2069 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2070 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2077 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2078 { 2079 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2080 PetscErrorCode ierr; 2081 2082 PetscFunctionBegin; 2083 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatEqual_MPIAIJ" 2089 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2090 { 2091 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2092 Mat a,b,c,d; 2093 PetscBool flg; 2094 PetscErrorCode ierr; 2095 2096 PetscFunctionBegin; 2097 a = matA->A; b = matA->B; 2098 c = matB->A; d = matB->B; 2099 2100 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2101 if (flg) { 2102 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2103 } 2104 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatCopy_MPIAIJ" 2110 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2111 { 2112 PetscErrorCode ierr; 2113 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2114 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2115 2116 PetscFunctionBegin; 2117 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2118 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2119 /* because of the column compression in the off-processor part of the matrix a->B, 2120 the number of columns in a->B and b->B may be different, hence we cannot call 2121 the MatCopy() directly on the two parts. If need be, we can provide a more 2122 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2123 then copying the submatrices */ 2124 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2125 } else { 2126 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2127 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2128 } 2129 PetscFunctionReturn(0); 2130 } 2131 2132 #undef __FUNCT__ 2133 #define __FUNCT__ "MatSetUp_MPIAIJ" 2134 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2135 { 2136 PetscErrorCode ierr; 2137 2138 PetscFunctionBegin; 2139 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2140 PetscFunctionReturn(0); 2141 } 2142 2143 /* 2144 Computes the number of nonzeros per row needed for preallocation when X and Y 2145 have different nonzero structure. 2146 */ 2147 #undef __FUNCT__ 2148 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2149 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2150 { 2151 PetscInt i,j,k,nzx,nzy; 2152 2153 PetscFunctionBegin; 2154 /* Set the number of nonzeros in the new matrix */ 2155 for (i=0; i<m; i++) { 2156 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2157 nzx = xi[i+1] - xi[i]; 2158 nzy = yi[i+1] - yi[i]; 2159 nnz[i] = 0; 2160 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2161 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2162 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2163 nnz[i]++; 2164 } 2165 for (; k<nzy; k++) nnz[i]++; 2166 } 2167 PetscFunctionReturn(0); 2168 } 2169 2170 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2173 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2174 { 2175 PetscErrorCode ierr; 2176 PetscInt m = Y->rmap->N; 2177 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2178 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2179 2180 PetscFunctionBegin; 2181 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2182 PetscFunctionReturn(0); 2183 } 2184 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatAXPY_MPIAIJ" 2187 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2188 { 2189 PetscErrorCode ierr; 2190 PetscInt i; 2191 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2192 PetscBLASInt bnz,one=1; 2193 Mat_SeqAIJ *x,*y; 2194 2195 PetscFunctionBegin; 2196 if (str == SAME_NONZERO_PATTERN) { 2197 PetscScalar alpha = a; 2198 x = (Mat_SeqAIJ*)xx->A->data; 2199 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2200 y = (Mat_SeqAIJ*)yy->A->data; 2201 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2202 x = (Mat_SeqAIJ*)xx->B->data; 2203 y = (Mat_SeqAIJ*)yy->B->data; 2204 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2205 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2206 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2207 } else if (str == SUBSET_NONZERO_PATTERN) { 2208 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2209 2210 x = (Mat_SeqAIJ*)xx->B->data; 2211 y = (Mat_SeqAIJ*)yy->B->data; 2212 if (y->xtoy && y->XtoY != xx->B) { 2213 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2214 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2215 } 2216 if (!y->xtoy) { /* get xtoy */ 2217 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2218 y->XtoY = xx->B; 2219 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2220 } 2221 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2222 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2223 } else { 2224 Mat B; 2225 PetscInt *nnz_d,*nnz_o; 2226 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2227 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2228 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2229 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2230 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2231 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2232 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2233 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2234 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2235 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2236 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2237 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2238 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2239 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2240 } 2241 PetscFunctionReturn(0); 2242 } 2243 2244 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2245 2246 #undef __FUNCT__ 2247 #define __FUNCT__ "MatConjugate_MPIAIJ" 2248 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2249 { 2250 #if defined(PETSC_USE_COMPLEX) 2251 PetscErrorCode ierr; 2252 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2253 2254 PetscFunctionBegin; 2255 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2256 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2257 #else 2258 PetscFunctionBegin; 2259 #endif 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatRealPart_MPIAIJ" 2265 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2266 { 2267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2268 PetscErrorCode ierr; 2269 2270 PetscFunctionBegin; 2271 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2272 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2273 PetscFunctionReturn(0); 2274 } 2275 2276 #undef __FUNCT__ 2277 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2278 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2279 { 2280 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2281 PetscErrorCode ierr; 2282 2283 PetscFunctionBegin; 2284 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2285 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #if defined(PETSC_HAVE_PBGL) 2290 2291 #include <boost/parallel/mpi/bsp_process_group.hpp> 2292 #include <boost/graph/distributed/ilu_default_graph.hpp> 2293 #include <boost/graph/distributed/ilu_0_block.hpp> 2294 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2295 #include <boost/graph/distributed/petsc/interface.hpp> 2296 #include <boost/multi_array.hpp> 2297 #include <boost/parallel/distributed_property_map->hpp> 2298 2299 #undef __FUNCT__ 2300 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2301 /* 2302 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2303 */ 2304 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2305 { 2306 namespace petsc = boost::distributed::petsc; 2307 2308 namespace graph_dist = boost::graph::distributed; 2309 using boost::graph::distributed::ilu_default::process_group_type; 2310 using boost::graph::ilu_permuted; 2311 2312 PetscBool row_identity, col_identity; 2313 PetscContainer c; 2314 PetscInt m, n, M, N; 2315 PetscErrorCode ierr; 2316 2317 PetscFunctionBegin; 2318 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2319 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2320 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2321 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2322 2323 process_group_type pg; 2324 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2325 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2326 lgraph_type& level_graph = *lgraph_p; 2327 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2328 2329 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2330 ilu_permuted(level_graph); 2331 2332 /* put together the new matrix */ 2333 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2334 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2335 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2336 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2337 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2338 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2339 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2340 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2341 2342 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2343 ierr = PetscContainerSetPointer(c, lgraph_p); 2344 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2345 ierr = PetscContainerDestroy(&c); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2351 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2352 { 2353 PetscFunctionBegin; 2354 PetscFunctionReturn(0); 2355 } 2356 2357 #undef __FUNCT__ 2358 #define __FUNCT__ "MatSolve_MPIAIJ" 2359 /* 2360 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2361 */ 2362 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2363 { 2364 namespace graph_dist = boost::graph::distributed; 2365 2366 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2367 lgraph_type *lgraph_p; 2368 PetscContainer c; 2369 PetscErrorCode ierr; 2370 2371 PetscFunctionBegin; 2372 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2373 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2374 ierr = VecCopy(b, x);CHKERRQ(ierr); 2375 2376 PetscScalar *array_x; 2377 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2378 PetscInt sx; 2379 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2380 2381 PetscScalar *array_b; 2382 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2383 PetscInt sb; 2384 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2385 2386 lgraph_type& level_graph = *lgraph_p; 2387 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2388 2389 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2390 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2391 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2392 2393 typedef boost::iterator_property_map<array_ref_type::iterator, 2394 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2395 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2396 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2397 2398 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2399 PetscFunctionReturn(0); 2400 } 2401 #endif 2402 2403 2404 #undef __FUNCT__ 2405 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2406 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2407 { 2408 PetscMPIInt rank,size; 2409 MPI_Comm comm; 2410 PetscErrorCode ierr; 2411 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2412 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2413 PetscInt *rowrange = mat->rmap->range; 2414 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2415 Mat A = aij->A,B=aij->B,C=*matredundant; 2416 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2417 PetscScalar *sbuf_a; 2418 PetscInt nzlocal=a->nz+b->nz; 2419 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2420 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2421 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2422 MatScalar *aworkA,*aworkB; 2423 PetscScalar *vals; 2424 PetscMPIInt tag1,tag2,tag3,imdex; 2425 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2426 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2427 MPI_Status recv_status,*send_status; 2428 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2429 PetscInt **rbuf_j=NULL; 2430 PetscScalar **rbuf_a=NULL; 2431 Mat_Redundant *redund =NULL; 2432 2433 PetscFunctionBegin; 2434 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2435 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2436 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2437 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2438 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2439 2440 if (reuse == MAT_REUSE_MATRIX) { 2441 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2442 if (subsize == 1) { 2443 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2444 redund = c->redundant; 2445 } else { 2446 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2447 redund = c->redundant; 2448 } 2449 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2450 2451 nsends = redund->nsends; 2452 nrecvs = redund->nrecvs; 2453 send_rank = redund->send_rank; 2454 recv_rank = redund->recv_rank; 2455 sbuf_nz = redund->sbuf_nz; 2456 rbuf_nz = redund->rbuf_nz; 2457 sbuf_j = redund->sbuf_j; 2458 sbuf_a = redund->sbuf_a; 2459 rbuf_j = redund->rbuf_j; 2460 rbuf_a = redund->rbuf_a; 2461 } 2462 2463 if (reuse == MAT_INITIAL_MATRIX) { 2464 PetscInt nleftover,np_subcomm; 2465 2466 /* get the destination processors' id send_rank, nsends and nrecvs */ 2467 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2468 2469 np_subcomm = size/nsubcomm; 2470 nleftover = size - nsubcomm*np_subcomm; 2471 2472 /* block of codes below is specific for INTERLACED */ 2473 /* ------------------------------------------------*/ 2474 nsends = 0; nrecvs = 0; 2475 for (i=0; i<size; i++) { 2476 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2477 send_rank[nsends++] = i; 2478 recv_rank[nrecvs++] = i; 2479 } 2480 } 2481 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2482 i = size-nleftover-1; 2483 j = 0; 2484 while (j < nsubcomm - nleftover) { 2485 send_rank[nsends++] = i; 2486 i--; j++; 2487 } 2488 } 2489 2490 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2491 for (i=0; i<nleftover; i++) { 2492 recv_rank[nrecvs++] = size-nleftover+i; 2493 } 2494 } 2495 /*----------------------------------------------*/ 2496 2497 /* allocate sbuf_j, sbuf_a */ 2498 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2499 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2500 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2501 /* 2502 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2503 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2504 */ 2505 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2506 2507 /* copy mat's local entries into the buffers */ 2508 if (reuse == MAT_INITIAL_MATRIX) { 2509 rownz_max = 0; 2510 rptr = sbuf_j; 2511 cols = sbuf_j + rend-rstart + 1; 2512 vals = sbuf_a; 2513 rptr[0] = 0; 2514 for (i=0; i<rend-rstart; i++) { 2515 row = i + rstart; 2516 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2517 ncols = nzA + nzB; 2518 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2519 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2520 /* load the column indices for this row into cols */ 2521 lwrite = 0; 2522 for (l=0; l<nzB; l++) { 2523 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2524 vals[lwrite] = aworkB[l]; 2525 cols[lwrite++] = ctmp; 2526 } 2527 } 2528 for (l=0; l<nzA; l++) { 2529 vals[lwrite] = aworkA[l]; 2530 cols[lwrite++] = cstart + cworkA[l]; 2531 } 2532 for (l=0; l<nzB; l++) { 2533 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2534 vals[lwrite] = aworkB[l]; 2535 cols[lwrite++] = ctmp; 2536 } 2537 } 2538 vals += ncols; 2539 cols += ncols; 2540 rptr[i+1] = rptr[i] + ncols; 2541 if (rownz_max < ncols) rownz_max = ncols; 2542 } 2543 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2544 } else { /* only copy matrix values into sbuf_a */ 2545 rptr = sbuf_j; 2546 vals = sbuf_a; 2547 rptr[0] = 0; 2548 for (i=0; i<rend-rstart; i++) { 2549 row = i + rstart; 2550 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2551 ncols = nzA + nzB; 2552 cworkB = b->j + b->i[i]; 2553 aworkA = a->a + a->i[i]; 2554 aworkB = b->a + b->i[i]; 2555 lwrite = 0; 2556 for (l=0; l<nzB; l++) { 2557 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2558 } 2559 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2560 for (l=0; l<nzB; l++) { 2561 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2562 } 2563 vals += ncols; 2564 rptr[i+1] = rptr[i] + ncols; 2565 } 2566 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2567 2568 /* send nzlocal to others, and recv other's nzlocal */ 2569 /*--------------------------------------------------*/ 2570 if (reuse == MAT_INITIAL_MATRIX) { 2571 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2572 2573 s_waits2 = s_waits3 + nsends; 2574 s_waits1 = s_waits2 + nsends; 2575 r_waits1 = s_waits1 + nsends; 2576 r_waits2 = r_waits1 + nrecvs; 2577 r_waits3 = r_waits2 + nrecvs; 2578 } else { 2579 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2580 2581 r_waits3 = s_waits3 + nsends; 2582 } 2583 2584 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2585 if (reuse == MAT_INITIAL_MATRIX) { 2586 /* get new tags to keep the communication clean */ 2587 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2588 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2589 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2590 2591 /* post receives of other's nzlocal */ 2592 for (i=0; i<nrecvs; i++) { 2593 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2594 } 2595 /* send nzlocal to others */ 2596 for (i=0; i<nsends; i++) { 2597 sbuf_nz[i] = nzlocal; 2598 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2599 } 2600 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2601 count = nrecvs; 2602 while (count) { 2603 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2604 2605 recv_rank[imdex] = recv_status.MPI_SOURCE; 2606 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2607 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2608 2609 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2610 2611 rbuf_nz[imdex] += i + 2; 2612 2613 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2614 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2615 count--; 2616 } 2617 /* wait on sends of nzlocal */ 2618 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2619 /* send mat->i,j to others, and recv from other's */ 2620 /*------------------------------------------------*/ 2621 for (i=0; i<nsends; i++) { 2622 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2623 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2624 } 2625 /* wait on receives of mat->i,j */ 2626 /*------------------------------*/ 2627 count = nrecvs; 2628 while (count) { 2629 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2630 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2631 count--; 2632 } 2633 /* wait on sends of mat->i,j */ 2634 /*---------------------------*/ 2635 if (nsends) { 2636 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2637 } 2638 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2639 2640 /* post receives, send and receive mat->a */ 2641 /*----------------------------------------*/ 2642 for (imdex=0; imdex<nrecvs; imdex++) { 2643 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2644 } 2645 for (i=0; i<nsends; i++) { 2646 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2647 } 2648 count = nrecvs; 2649 while (count) { 2650 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2651 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2652 count--; 2653 } 2654 if (nsends) { 2655 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2656 } 2657 2658 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2659 2660 /* create redundant matrix */ 2661 /*-------------------------*/ 2662 if (reuse == MAT_INITIAL_MATRIX) { 2663 const PetscInt *range; 2664 PetscInt rstart_sub,rend_sub,mloc_sub; 2665 2666 /* compute rownz_max for preallocation */ 2667 for (imdex=0; imdex<nrecvs; imdex++) { 2668 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2669 rptr = rbuf_j[imdex]; 2670 for (i=0; i<j; i++) { 2671 ncols = rptr[i+1] - rptr[i]; 2672 if (rownz_max < ncols) rownz_max = ncols; 2673 } 2674 } 2675 2676 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2677 2678 /* get local size of redundant matrix 2679 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2680 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2681 rstart_sub = range[nsubcomm*subrank]; 2682 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2683 rend_sub = range[nsubcomm*(subrank+1)]; 2684 } else { 2685 rend_sub = mat->rmap->N; 2686 } 2687 mloc_sub = rend_sub - rstart_sub; 2688 2689 if (M == N) { 2690 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2691 } else { /* non-square matrix */ 2692 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2693 } 2694 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2695 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2696 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2697 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2698 } else { 2699 C = *matredundant; 2700 } 2701 2702 /* insert local matrix entries */ 2703 rptr = sbuf_j; 2704 cols = sbuf_j + rend-rstart + 1; 2705 vals = sbuf_a; 2706 for (i=0; i<rend-rstart; i++) { 2707 row = i + rstart; 2708 ncols = rptr[i+1] - rptr[i]; 2709 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2710 vals += ncols; 2711 cols += ncols; 2712 } 2713 /* insert received matrix entries */ 2714 for (imdex=0; imdex<nrecvs; imdex++) { 2715 rstart = rowrange[recv_rank[imdex]]; 2716 rend = rowrange[recv_rank[imdex]+1]; 2717 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2718 rptr = rbuf_j[imdex]; 2719 cols = rbuf_j[imdex] + rend-rstart + 1; 2720 vals = rbuf_a[imdex]; 2721 for (i=0; i<rend-rstart; i++) { 2722 row = i + rstart; 2723 ncols = rptr[i+1] - rptr[i]; 2724 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2725 vals += ncols; 2726 cols += ncols; 2727 } 2728 } 2729 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2730 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2731 2732 if (reuse == MAT_INITIAL_MATRIX) { 2733 *matredundant = C; 2734 2735 /* create a supporting struct and attach it to C for reuse */ 2736 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2737 if (subsize == 1) { 2738 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2739 c->redundant = redund; 2740 } else { 2741 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2742 c->redundant = redund; 2743 } 2744 2745 redund->nzlocal = nzlocal; 2746 redund->nsends = nsends; 2747 redund->nrecvs = nrecvs; 2748 redund->send_rank = send_rank; 2749 redund->recv_rank = recv_rank; 2750 redund->sbuf_nz = sbuf_nz; 2751 redund->rbuf_nz = rbuf_nz; 2752 redund->sbuf_j = sbuf_j; 2753 redund->sbuf_a = sbuf_a; 2754 redund->rbuf_j = rbuf_j; 2755 redund->rbuf_a = rbuf_a; 2756 redund->psubcomm = NULL; 2757 } 2758 PetscFunctionReturn(0); 2759 } 2760 2761 #undef __FUNCT__ 2762 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2763 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2764 { 2765 PetscErrorCode ierr; 2766 MPI_Comm comm; 2767 PetscMPIInt size,subsize; 2768 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2769 Mat_Redundant *redund=NULL; 2770 PetscSubcomm psubcomm=NULL; 2771 MPI_Comm subcomm_in=subcomm; 2772 Mat *matseq; 2773 IS isrow,iscol; 2774 2775 PetscFunctionBegin; 2776 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2777 if (reuse == MAT_INITIAL_MATRIX) { 2778 /* create psubcomm, then get subcomm */ 2779 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2780 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2781 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2782 2783 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2784 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2785 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2786 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2787 subcomm = psubcomm->comm; 2788 } else { /* retrieve psubcomm and subcomm */ 2789 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2790 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2791 if (subsize == 1) { 2792 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2793 redund = c->redundant; 2794 } else { 2795 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2796 redund = c->redundant; 2797 } 2798 psubcomm = redund->psubcomm; 2799 } 2800 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2801 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2802 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2803 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2804 if (subsize == 1) { 2805 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2806 c->redundant->psubcomm = psubcomm; 2807 } else { 2808 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2809 c->redundant->psubcomm = psubcomm ; 2810 } 2811 } 2812 PetscFunctionReturn(0); 2813 } 2814 } 2815 2816 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2817 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2818 if (reuse == MAT_INITIAL_MATRIX) { 2819 /* create a local sequential matrix matseq[0] */ 2820 mloc_sub = PETSC_DECIDE; 2821 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2822 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2823 rstart = rend - mloc_sub; 2824 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2825 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2826 } else { /* reuse == MAT_REUSE_MATRIX */ 2827 if (subsize == 1) { 2828 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2829 redund = c->redundant; 2830 } else { 2831 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2832 redund = c->redundant; 2833 } 2834 2835 isrow = redund->isrow; 2836 iscol = redund->iscol; 2837 matseq = redund->matseq; 2838 } 2839 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2840 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2841 2842 if (reuse == MAT_INITIAL_MATRIX) { 2843 /* create a supporting struct and attach it to C for reuse */ 2844 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2845 if (subsize == 1) { 2846 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2847 c->redundant = redund; 2848 } else { 2849 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2850 c->redundant = redund; 2851 } 2852 redund->isrow = isrow; 2853 redund->iscol = iscol; 2854 redund->matseq = matseq; 2855 redund->psubcomm = psubcomm; 2856 } 2857 PetscFunctionReturn(0); 2858 } 2859 2860 #undef __FUNCT__ 2861 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2862 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2863 { 2864 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2865 PetscErrorCode ierr; 2866 PetscInt i,*idxb = 0; 2867 PetscScalar *va,*vb; 2868 Vec vtmp; 2869 2870 PetscFunctionBegin; 2871 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2872 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2873 if (idx) { 2874 for (i=0; i<A->rmap->n; i++) { 2875 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2876 } 2877 } 2878 2879 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2880 if (idx) { 2881 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2882 } 2883 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2884 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2885 2886 for (i=0; i<A->rmap->n; i++) { 2887 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2888 va[i] = vb[i]; 2889 if (idx) idx[i] = a->garray[idxb[i]]; 2890 } 2891 } 2892 2893 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2894 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2895 ierr = PetscFree(idxb);CHKERRQ(ierr); 2896 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2897 PetscFunctionReturn(0); 2898 } 2899 2900 #undef __FUNCT__ 2901 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2902 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2903 { 2904 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2905 PetscErrorCode ierr; 2906 PetscInt i,*idxb = 0; 2907 PetscScalar *va,*vb; 2908 Vec vtmp; 2909 2910 PetscFunctionBegin; 2911 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2912 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2913 if (idx) { 2914 for (i=0; i<A->cmap->n; i++) { 2915 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2916 } 2917 } 2918 2919 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2920 if (idx) { 2921 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2922 } 2923 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2924 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2925 2926 for (i=0; i<A->rmap->n; i++) { 2927 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2928 va[i] = vb[i]; 2929 if (idx) idx[i] = a->garray[idxb[i]]; 2930 } 2931 } 2932 2933 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2934 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2935 ierr = PetscFree(idxb);CHKERRQ(ierr); 2936 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2937 PetscFunctionReturn(0); 2938 } 2939 2940 #undef __FUNCT__ 2941 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2942 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2943 { 2944 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2945 PetscInt n = A->rmap->n; 2946 PetscInt cstart = A->cmap->rstart; 2947 PetscInt *cmap = mat->garray; 2948 PetscInt *diagIdx, *offdiagIdx; 2949 Vec diagV, offdiagV; 2950 PetscScalar *a, *diagA, *offdiagA; 2951 PetscInt r; 2952 PetscErrorCode ierr; 2953 2954 PetscFunctionBegin; 2955 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2956 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2957 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2958 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2959 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2960 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2961 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2962 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2963 for (r = 0; r < n; ++r) { 2964 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2965 a[r] = diagA[r]; 2966 idx[r] = cstart + diagIdx[r]; 2967 } else { 2968 a[r] = offdiagA[r]; 2969 idx[r] = cmap[offdiagIdx[r]]; 2970 } 2971 } 2972 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2973 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2974 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2975 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2976 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2977 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2978 PetscFunctionReturn(0); 2979 } 2980 2981 #undef __FUNCT__ 2982 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2983 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2984 { 2985 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2986 PetscInt n = A->rmap->n; 2987 PetscInt cstart = A->cmap->rstart; 2988 PetscInt *cmap = mat->garray; 2989 PetscInt *diagIdx, *offdiagIdx; 2990 Vec diagV, offdiagV; 2991 PetscScalar *a, *diagA, *offdiagA; 2992 PetscInt r; 2993 PetscErrorCode ierr; 2994 2995 PetscFunctionBegin; 2996 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2997 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2998 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2999 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3000 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3001 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3002 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3003 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3004 for (r = 0; r < n; ++r) { 3005 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3006 a[r] = diagA[r]; 3007 idx[r] = cstart + diagIdx[r]; 3008 } else { 3009 a[r] = offdiagA[r]; 3010 idx[r] = cmap[offdiagIdx[r]]; 3011 } 3012 } 3013 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3014 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3015 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3016 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3017 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3018 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3019 PetscFunctionReturn(0); 3020 } 3021 3022 #undef __FUNCT__ 3023 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3024 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3025 { 3026 PetscErrorCode ierr; 3027 Mat *dummy; 3028 3029 PetscFunctionBegin; 3030 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3031 *newmat = *dummy; 3032 ierr = PetscFree(dummy);CHKERRQ(ierr); 3033 PetscFunctionReturn(0); 3034 } 3035 3036 #undef __FUNCT__ 3037 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3038 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3039 { 3040 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3041 PetscErrorCode ierr; 3042 3043 PetscFunctionBegin; 3044 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3045 PetscFunctionReturn(0); 3046 } 3047 3048 #undef __FUNCT__ 3049 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3050 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3051 { 3052 PetscErrorCode ierr; 3053 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3054 3055 PetscFunctionBegin; 3056 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3057 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3058 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3059 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 PetscFunctionReturn(0); 3061 } 3062 3063 /* -------------------------------------------------------------------*/ 3064 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3065 MatGetRow_MPIAIJ, 3066 MatRestoreRow_MPIAIJ, 3067 MatMult_MPIAIJ, 3068 /* 4*/ MatMultAdd_MPIAIJ, 3069 MatMultTranspose_MPIAIJ, 3070 MatMultTransposeAdd_MPIAIJ, 3071 #if defined(PETSC_HAVE_PBGL) 3072 MatSolve_MPIAIJ, 3073 #else 3074 0, 3075 #endif 3076 0, 3077 0, 3078 /*10*/ 0, 3079 0, 3080 0, 3081 MatSOR_MPIAIJ, 3082 MatTranspose_MPIAIJ, 3083 /*15*/ MatGetInfo_MPIAIJ, 3084 MatEqual_MPIAIJ, 3085 MatGetDiagonal_MPIAIJ, 3086 MatDiagonalScale_MPIAIJ, 3087 MatNorm_MPIAIJ, 3088 /*20*/ MatAssemblyBegin_MPIAIJ, 3089 MatAssemblyEnd_MPIAIJ, 3090 MatSetOption_MPIAIJ, 3091 MatZeroEntries_MPIAIJ, 3092 /*24*/ MatZeroRows_MPIAIJ, 3093 0, 3094 #if defined(PETSC_HAVE_PBGL) 3095 0, 3096 #else 3097 0, 3098 #endif 3099 0, 3100 0, 3101 /*29*/ MatSetUp_MPIAIJ, 3102 #if defined(PETSC_HAVE_PBGL) 3103 0, 3104 #else 3105 0, 3106 #endif 3107 0, 3108 0, 3109 0, 3110 /*34*/ MatDuplicate_MPIAIJ, 3111 0, 3112 0, 3113 0, 3114 0, 3115 /*39*/ MatAXPY_MPIAIJ, 3116 MatGetSubMatrices_MPIAIJ, 3117 MatIncreaseOverlap_MPIAIJ, 3118 MatGetValues_MPIAIJ, 3119 MatCopy_MPIAIJ, 3120 /*44*/ MatGetRowMax_MPIAIJ, 3121 MatScale_MPIAIJ, 3122 0, 3123 MatDiagonalSet_MPIAIJ, 3124 MatZeroRowsColumns_MPIAIJ, 3125 /*49*/ MatSetRandom_MPIAIJ, 3126 0, 3127 0, 3128 0, 3129 0, 3130 /*54*/ MatFDColoringCreate_MPIXAIJ, 3131 0, 3132 MatSetUnfactored_MPIAIJ, 3133 MatPermute_MPIAIJ, 3134 0, 3135 /*59*/ MatGetSubMatrix_MPIAIJ, 3136 MatDestroy_MPIAIJ, 3137 MatView_MPIAIJ, 3138 0, 3139 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3140 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3141 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3142 0, 3143 0, 3144 0, 3145 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3146 MatGetRowMinAbs_MPIAIJ, 3147 0, 3148 MatSetColoring_MPIAIJ, 3149 0, 3150 MatSetValuesAdifor_MPIAIJ, 3151 /*75*/ MatFDColoringApply_AIJ, 3152 0, 3153 0, 3154 0, 3155 MatFindZeroDiagonals_MPIAIJ, 3156 /*80*/ 0, 3157 0, 3158 0, 3159 /*83*/ MatLoad_MPIAIJ, 3160 0, 3161 0, 3162 0, 3163 0, 3164 0, 3165 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3166 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3167 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3168 MatPtAP_MPIAIJ_MPIAIJ, 3169 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3170 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3171 0, 3172 0, 3173 0, 3174 0, 3175 /*99*/ 0, 3176 0, 3177 0, 3178 MatConjugate_MPIAIJ, 3179 0, 3180 /*104*/MatSetValuesRow_MPIAIJ, 3181 MatRealPart_MPIAIJ, 3182 MatImaginaryPart_MPIAIJ, 3183 0, 3184 0, 3185 /*109*/0, 3186 MatGetRedundantMatrix_MPIAIJ, 3187 MatGetRowMin_MPIAIJ, 3188 0, 3189 0, 3190 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3191 0, 3192 0, 3193 0, 3194 0, 3195 /*119*/0, 3196 0, 3197 0, 3198 0, 3199 MatGetMultiProcBlock_MPIAIJ, 3200 /*124*/MatFindNonzeroRows_MPIAIJ, 3201 MatGetColumnNorms_MPIAIJ, 3202 MatInvertBlockDiagonal_MPIAIJ, 3203 0, 3204 MatGetSubMatricesParallel_MPIAIJ, 3205 /*129*/0, 3206 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3207 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3208 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3209 0, 3210 /*134*/0, 3211 0, 3212 0, 3213 0, 3214 0, 3215 /*139*/0, 3216 0, 3217 0, 3218 MatFDColoringSetUp_MPIXAIJ 3219 }; 3220 3221 /* ----------------------------------------------------------------------------------------*/ 3222 3223 #undef __FUNCT__ 3224 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3225 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3226 { 3227 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3228 PetscErrorCode ierr; 3229 3230 PetscFunctionBegin; 3231 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3232 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3233 PetscFunctionReturn(0); 3234 } 3235 3236 #undef __FUNCT__ 3237 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3238 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3239 { 3240 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3241 PetscErrorCode ierr; 3242 3243 PetscFunctionBegin; 3244 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3245 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3246 PetscFunctionReturn(0); 3247 } 3248 3249 #undef __FUNCT__ 3250 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3251 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3252 { 3253 Mat_MPIAIJ *b; 3254 PetscErrorCode ierr; 3255 3256 PetscFunctionBegin; 3257 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3258 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3259 b = (Mat_MPIAIJ*)B->data; 3260 3261 if (!B->preallocated) { 3262 /* Explicitly create 2 MATSEQAIJ matrices. */ 3263 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3264 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3265 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3266 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3267 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3268 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3269 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3270 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3271 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3272 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3273 } 3274 3275 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3276 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3277 B->preallocated = PETSC_TRUE; 3278 PetscFunctionReturn(0); 3279 } 3280 3281 #undef __FUNCT__ 3282 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3283 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3284 { 3285 Mat mat; 3286 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3287 PetscErrorCode ierr; 3288 3289 PetscFunctionBegin; 3290 *newmat = 0; 3291 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3292 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3293 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3294 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3295 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3296 a = (Mat_MPIAIJ*)mat->data; 3297 3298 mat->factortype = matin->factortype; 3299 mat->assembled = PETSC_TRUE; 3300 mat->insertmode = NOT_SET_VALUES; 3301 mat->preallocated = PETSC_TRUE; 3302 3303 a->size = oldmat->size; 3304 a->rank = oldmat->rank; 3305 a->donotstash = oldmat->donotstash; 3306 a->roworiented = oldmat->roworiented; 3307 a->rowindices = 0; 3308 a->rowvalues = 0; 3309 a->getrowactive = PETSC_FALSE; 3310 3311 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3312 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3313 3314 if (oldmat->colmap) { 3315 #if defined(PETSC_USE_CTABLE) 3316 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3317 #else 3318 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3319 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3320 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3321 #endif 3322 } else a->colmap = 0; 3323 if (oldmat->garray) { 3324 PetscInt len; 3325 len = oldmat->B->cmap->n; 3326 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3327 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3328 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3329 } else a->garray = 0; 3330 3331 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3332 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3333 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3334 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3335 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3336 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3337 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3338 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3339 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3340 *newmat = mat; 3341 PetscFunctionReturn(0); 3342 } 3343 3344 3345 3346 #undef __FUNCT__ 3347 #define __FUNCT__ "MatLoad_MPIAIJ" 3348 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3349 { 3350 PetscScalar *vals,*svals; 3351 MPI_Comm comm; 3352 PetscErrorCode ierr; 3353 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3354 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3355 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3356 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3357 PetscInt cend,cstart,n,*rowners,sizesset=1; 3358 int fd; 3359 PetscInt bs = 1; 3360 3361 PetscFunctionBegin; 3362 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3363 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3364 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3365 if (!rank) { 3366 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3367 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3368 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3369 } 3370 3371 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3372 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3373 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3374 3375 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3376 3377 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3378 M = header[1]; N = header[2]; 3379 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3380 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3381 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3382 3383 /* If global sizes are set, check if they are consistent with that given in the file */ 3384 if (sizesset) { 3385 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3386 } 3387 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3388 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3389 3390 /* determine ownership of all (block) rows */ 3391 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3392 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3393 else m = newMat->rmap->n; /* Set by user */ 3394 3395 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3396 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3397 3398 /* First process needs enough room for process with most rows */ 3399 if (!rank) { 3400 mmax = rowners[1]; 3401 for (i=2; i<=size; i++) { 3402 mmax = PetscMax(mmax, rowners[i]); 3403 } 3404 } else mmax = -1; /* unused, but compilers complain */ 3405 3406 rowners[0] = 0; 3407 for (i=2; i<=size; i++) { 3408 rowners[i] += rowners[i-1]; 3409 } 3410 rstart = rowners[rank]; 3411 rend = rowners[rank+1]; 3412 3413 /* distribute row lengths to all processors */ 3414 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3415 if (!rank) { 3416 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3417 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3418 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3419 for (j=0; j<m; j++) { 3420 procsnz[0] += ourlens[j]; 3421 } 3422 for (i=1; i<size; i++) { 3423 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3424 /* calculate the number of nonzeros on each processor */ 3425 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3426 procsnz[i] += rowlengths[j]; 3427 } 3428 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3429 } 3430 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3431 } else { 3432 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3433 } 3434 3435 if (!rank) { 3436 /* determine max buffer needed and allocate it */ 3437 maxnz = 0; 3438 for (i=0; i<size; i++) { 3439 maxnz = PetscMax(maxnz,procsnz[i]); 3440 } 3441 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3442 3443 /* read in my part of the matrix column indices */ 3444 nz = procsnz[0]; 3445 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3446 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3447 3448 /* read in every one elses and ship off */ 3449 for (i=1; i<size; i++) { 3450 nz = procsnz[i]; 3451 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3452 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3453 } 3454 ierr = PetscFree(cols);CHKERRQ(ierr); 3455 } else { 3456 /* determine buffer space needed for message */ 3457 nz = 0; 3458 for (i=0; i<m; i++) { 3459 nz += ourlens[i]; 3460 } 3461 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3462 3463 /* receive message of column indices*/ 3464 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3465 } 3466 3467 /* determine column ownership if matrix is not square */ 3468 if (N != M) { 3469 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3470 else n = newMat->cmap->n; 3471 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3472 cstart = cend - n; 3473 } else { 3474 cstart = rstart; 3475 cend = rend; 3476 n = cend - cstart; 3477 } 3478 3479 /* loop over local rows, determining number of off diagonal entries */ 3480 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3481 jj = 0; 3482 for (i=0; i<m; i++) { 3483 for (j=0; j<ourlens[i]; j++) { 3484 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3485 jj++; 3486 } 3487 } 3488 3489 for (i=0; i<m; i++) { 3490 ourlens[i] -= offlens[i]; 3491 } 3492 if (!sizesset) { 3493 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3494 } 3495 3496 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3497 3498 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3499 3500 for (i=0; i<m; i++) { 3501 ourlens[i] += offlens[i]; 3502 } 3503 3504 if (!rank) { 3505 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3506 3507 /* read in my part of the matrix numerical values */ 3508 nz = procsnz[0]; 3509 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3510 3511 /* insert into matrix */ 3512 jj = rstart; 3513 smycols = mycols; 3514 svals = vals; 3515 for (i=0; i<m; i++) { 3516 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3517 smycols += ourlens[i]; 3518 svals += ourlens[i]; 3519 jj++; 3520 } 3521 3522 /* read in other processors and ship out */ 3523 for (i=1; i<size; i++) { 3524 nz = procsnz[i]; 3525 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3526 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3527 } 3528 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3529 } else { 3530 /* receive numeric values */ 3531 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3532 3533 /* receive message of values*/ 3534 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3535 3536 /* insert into matrix */ 3537 jj = rstart; 3538 smycols = mycols; 3539 svals = vals; 3540 for (i=0; i<m; i++) { 3541 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3542 smycols += ourlens[i]; 3543 svals += ourlens[i]; 3544 jj++; 3545 } 3546 } 3547 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3548 ierr = PetscFree(vals);CHKERRQ(ierr); 3549 ierr = PetscFree(mycols);CHKERRQ(ierr); 3550 ierr = PetscFree(rowners);CHKERRQ(ierr); 3551 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3552 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3553 PetscFunctionReturn(0); 3554 } 3555 3556 #undef __FUNCT__ 3557 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3558 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3559 { 3560 PetscErrorCode ierr; 3561 IS iscol_local; 3562 PetscInt csize; 3563 3564 PetscFunctionBegin; 3565 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3566 if (call == MAT_REUSE_MATRIX) { 3567 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3568 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3569 } else { 3570 PetscInt cbs; 3571 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3572 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3573 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3574 } 3575 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3576 if (call == MAT_INITIAL_MATRIX) { 3577 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3578 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3579 } 3580 PetscFunctionReturn(0); 3581 } 3582 3583 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3584 #undef __FUNCT__ 3585 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3586 /* 3587 Not great since it makes two copies of the submatrix, first an SeqAIJ 3588 in local and then by concatenating the local matrices the end result. 3589 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3590 3591 Note: This requires a sequential iscol with all indices. 3592 */ 3593 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3594 { 3595 PetscErrorCode ierr; 3596 PetscMPIInt rank,size; 3597 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3598 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3599 PetscBool allcolumns, colflag; 3600 Mat M,Mreuse; 3601 MatScalar *vwork,*aa; 3602 MPI_Comm comm; 3603 Mat_SeqAIJ *aij; 3604 3605 PetscFunctionBegin; 3606 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3607 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3608 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3609 3610 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3611 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3612 if (colflag && ncol == mat->cmap->N) { 3613 allcolumns = PETSC_TRUE; 3614 } else { 3615 allcolumns = PETSC_FALSE; 3616 } 3617 if (call == MAT_REUSE_MATRIX) { 3618 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3619 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3620 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3621 } else { 3622 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3623 } 3624 3625 /* 3626 m - number of local rows 3627 n - number of columns (same on all processors) 3628 rstart - first row in new global matrix generated 3629 */ 3630 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3631 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3632 if (call == MAT_INITIAL_MATRIX) { 3633 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3634 ii = aij->i; 3635 jj = aij->j; 3636 3637 /* 3638 Determine the number of non-zeros in the diagonal and off-diagonal 3639 portions of the matrix in order to do correct preallocation 3640 */ 3641 3642 /* first get start and end of "diagonal" columns */ 3643 if (csize == PETSC_DECIDE) { 3644 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3645 if (mglobal == n) { /* square matrix */ 3646 nlocal = m; 3647 } else { 3648 nlocal = n/size + ((n % size) > rank); 3649 } 3650 } else { 3651 nlocal = csize; 3652 } 3653 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3654 rstart = rend - nlocal; 3655 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3656 3657 /* next, compute all the lengths */ 3658 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3659 olens = dlens + m; 3660 for (i=0; i<m; i++) { 3661 jend = ii[i+1] - ii[i]; 3662 olen = 0; 3663 dlen = 0; 3664 for (j=0; j<jend; j++) { 3665 if (*jj < rstart || *jj >= rend) olen++; 3666 else dlen++; 3667 jj++; 3668 } 3669 olens[i] = olen; 3670 dlens[i] = dlen; 3671 } 3672 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3673 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3674 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3675 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3676 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3677 ierr = PetscFree(dlens);CHKERRQ(ierr); 3678 } else { 3679 PetscInt ml,nl; 3680 3681 M = *newmat; 3682 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3683 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3684 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3685 /* 3686 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3687 rather than the slower MatSetValues(). 3688 */ 3689 M->was_assembled = PETSC_TRUE; 3690 M->assembled = PETSC_FALSE; 3691 } 3692 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3693 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3694 ii = aij->i; 3695 jj = aij->j; 3696 aa = aij->a; 3697 for (i=0; i<m; i++) { 3698 row = rstart + i; 3699 nz = ii[i+1] - ii[i]; 3700 cwork = jj; jj += nz; 3701 vwork = aa; aa += nz; 3702 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3703 } 3704 3705 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3706 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3707 *newmat = M; 3708 3709 /* save submatrix used in processor for next request */ 3710 if (call == MAT_INITIAL_MATRIX) { 3711 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3712 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3713 } 3714 PetscFunctionReturn(0); 3715 } 3716 3717 #undef __FUNCT__ 3718 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3719 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3720 { 3721 PetscInt m,cstart, cend,j,nnz,i,d; 3722 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3723 const PetscInt *JJ; 3724 PetscScalar *values; 3725 PetscErrorCode ierr; 3726 3727 PetscFunctionBegin; 3728 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3729 3730 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3731 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3732 m = B->rmap->n; 3733 cstart = B->cmap->rstart; 3734 cend = B->cmap->rend; 3735 rstart = B->rmap->rstart; 3736 3737 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3738 3739 #if defined(PETSC_USE_DEBUGGING) 3740 for (i=0; i<m; i++) { 3741 nnz = Ii[i+1]- Ii[i]; 3742 JJ = J + Ii[i]; 3743 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3744 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3745 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3746 } 3747 #endif 3748 3749 for (i=0; i<m; i++) { 3750 nnz = Ii[i+1]- Ii[i]; 3751 JJ = J + Ii[i]; 3752 nnz_max = PetscMax(nnz_max,nnz); 3753 d = 0; 3754 for (j=0; j<nnz; j++) { 3755 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3756 } 3757 d_nnz[i] = d; 3758 o_nnz[i] = nnz - d; 3759 } 3760 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3761 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3762 3763 if (v) values = (PetscScalar*)v; 3764 else { 3765 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3766 } 3767 3768 for (i=0; i<m; i++) { 3769 ii = i + rstart; 3770 nnz = Ii[i+1]- Ii[i]; 3771 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3772 } 3773 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3774 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3775 3776 if (!v) { 3777 ierr = PetscFree(values);CHKERRQ(ierr); 3778 } 3779 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3780 PetscFunctionReturn(0); 3781 } 3782 3783 #undef __FUNCT__ 3784 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3785 /*@ 3786 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3787 (the default parallel PETSc format). 3788 3789 Collective on MPI_Comm 3790 3791 Input Parameters: 3792 + B - the matrix 3793 . i - the indices into j for the start of each local row (starts with zero) 3794 . j - the column indices for each local row (starts with zero) 3795 - v - optional values in the matrix 3796 3797 Level: developer 3798 3799 Notes: 3800 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3801 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3802 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3803 3804 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3805 3806 The format which is used for the sparse matrix input, is equivalent to a 3807 row-major ordering.. i.e for the following matrix, the input data expected is 3808 as shown: 3809 3810 1 0 0 3811 2 0 3 P0 3812 ------- 3813 4 5 6 P1 3814 3815 Process0 [P0]: rows_owned=[0,1] 3816 i = {0,1,3} [size = nrow+1 = 2+1] 3817 j = {0,0,2} [size = nz = 6] 3818 v = {1,2,3} [size = nz = 6] 3819 3820 Process1 [P1]: rows_owned=[2] 3821 i = {0,3} [size = nrow+1 = 1+1] 3822 j = {0,1,2} [size = nz = 6] 3823 v = {4,5,6} [size = nz = 6] 3824 3825 .keywords: matrix, aij, compressed row, sparse, parallel 3826 3827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3828 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3829 @*/ 3830 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3831 { 3832 PetscErrorCode ierr; 3833 3834 PetscFunctionBegin; 3835 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3836 PetscFunctionReturn(0); 3837 } 3838 3839 #undef __FUNCT__ 3840 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3841 /*@C 3842 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3843 (the default parallel PETSc format). For good matrix assembly performance 3844 the user should preallocate the matrix storage by setting the parameters 3845 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3846 performance can be increased by more than a factor of 50. 3847 3848 Collective on MPI_Comm 3849 3850 Input Parameters: 3851 + B - the matrix 3852 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3853 (same value is used for all local rows) 3854 . d_nnz - array containing the number of nonzeros in the various rows of the 3855 DIAGONAL portion of the local submatrix (possibly different for each row) 3856 or NULL, if d_nz is used to specify the nonzero structure. 3857 The size of this array is equal to the number of local rows, i.e 'm'. 3858 For matrices that will be factored, you must leave room for (and set) 3859 the diagonal entry even if it is zero. 3860 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3861 submatrix (same value is used for all local rows). 3862 - o_nnz - array containing the number of nonzeros in the various rows of the 3863 OFF-DIAGONAL portion of the local submatrix (possibly different for 3864 each row) or NULL, if o_nz is used to specify the nonzero 3865 structure. The size of this array is equal to the number 3866 of local rows, i.e 'm'. 3867 3868 If the *_nnz parameter is given then the *_nz parameter is ignored 3869 3870 The AIJ format (also called the Yale sparse matrix format or 3871 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3872 storage. The stored row and column indices begin with zero. 3873 See Users-Manual: ch_mat for details. 3874 3875 The parallel matrix is partitioned such that the first m0 rows belong to 3876 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3877 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3878 3879 The DIAGONAL portion of the local submatrix of a processor can be defined 3880 as the submatrix which is obtained by extraction the part corresponding to 3881 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3882 first row that belongs to the processor, r2 is the last row belonging to 3883 the this processor, and c1-c2 is range of indices of the local part of a 3884 vector suitable for applying the matrix to. This is an mxn matrix. In the 3885 common case of a square matrix, the row and column ranges are the same and 3886 the DIAGONAL part is also square. The remaining portion of the local 3887 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3888 3889 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3890 3891 You can call MatGetInfo() to get information on how effective the preallocation was; 3892 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3893 You can also run with the option -info and look for messages with the string 3894 malloc in them to see if additional memory allocation was needed. 3895 3896 Example usage: 3897 3898 Consider the following 8x8 matrix with 34 non-zero values, that is 3899 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3900 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3901 as follows: 3902 3903 .vb 3904 1 2 0 | 0 3 0 | 0 4 3905 Proc0 0 5 6 | 7 0 0 | 8 0 3906 9 0 10 | 11 0 0 | 12 0 3907 ------------------------------------- 3908 13 0 14 | 15 16 17 | 0 0 3909 Proc1 0 18 0 | 19 20 21 | 0 0 3910 0 0 0 | 22 23 0 | 24 0 3911 ------------------------------------- 3912 Proc2 25 26 27 | 0 0 28 | 29 0 3913 30 0 0 | 31 32 33 | 0 34 3914 .ve 3915 3916 This can be represented as a collection of submatrices as: 3917 3918 .vb 3919 A B C 3920 D E F 3921 G H I 3922 .ve 3923 3924 Where the submatrices A,B,C are owned by proc0, D,E,F are 3925 owned by proc1, G,H,I are owned by proc2. 3926 3927 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3928 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3929 The 'M','N' parameters are 8,8, and have the same values on all procs. 3930 3931 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3932 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3933 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3934 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3935 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3936 matrix, ans [DF] as another SeqAIJ matrix. 3937 3938 When d_nz, o_nz parameters are specified, d_nz storage elements are 3939 allocated for every row of the local diagonal submatrix, and o_nz 3940 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3941 One way to choose d_nz and o_nz is to use the max nonzerors per local 3942 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3943 In this case, the values of d_nz,o_nz are: 3944 .vb 3945 proc0 : dnz = 2, o_nz = 2 3946 proc1 : dnz = 3, o_nz = 2 3947 proc2 : dnz = 1, o_nz = 4 3948 .ve 3949 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3950 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3951 for proc3. i.e we are using 12+15+10=37 storage locations to store 3952 34 values. 3953 3954 When d_nnz, o_nnz parameters are specified, the storage is specified 3955 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3956 In the above case the values for d_nnz,o_nnz are: 3957 .vb 3958 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3959 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3960 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3961 .ve 3962 Here the space allocated is sum of all the above values i.e 34, and 3963 hence pre-allocation is perfect. 3964 3965 Level: intermediate 3966 3967 .keywords: matrix, aij, compressed row, sparse, parallel 3968 3969 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3970 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3971 @*/ 3972 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3973 { 3974 PetscErrorCode ierr; 3975 3976 PetscFunctionBegin; 3977 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3978 PetscValidType(B,1); 3979 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3980 PetscFunctionReturn(0); 3981 } 3982 3983 #undef __FUNCT__ 3984 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3985 /*@ 3986 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3987 CSR format the local rows. 3988 3989 Collective on MPI_Comm 3990 3991 Input Parameters: 3992 + comm - MPI communicator 3993 . m - number of local rows (Cannot be PETSC_DECIDE) 3994 . n - This value should be the same as the local size used in creating the 3995 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3996 calculated if N is given) For square matrices n is almost always m. 3997 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3998 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3999 . i - row indices 4000 . j - column indices 4001 - a - matrix values 4002 4003 Output Parameter: 4004 . mat - the matrix 4005 4006 Level: intermediate 4007 4008 Notes: 4009 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4010 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4011 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4012 4013 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4014 4015 The format which is used for the sparse matrix input, is equivalent to a 4016 row-major ordering.. i.e for the following matrix, the input data expected is 4017 as shown: 4018 4019 1 0 0 4020 2 0 3 P0 4021 ------- 4022 4 5 6 P1 4023 4024 Process0 [P0]: rows_owned=[0,1] 4025 i = {0,1,3} [size = nrow+1 = 2+1] 4026 j = {0,0,2} [size = nz = 6] 4027 v = {1,2,3} [size = nz = 6] 4028 4029 Process1 [P1]: rows_owned=[2] 4030 i = {0,3} [size = nrow+1 = 1+1] 4031 j = {0,1,2} [size = nz = 6] 4032 v = {4,5,6} [size = nz = 6] 4033 4034 .keywords: matrix, aij, compressed row, sparse, parallel 4035 4036 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4037 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4038 @*/ 4039 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4040 { 4041 PetscErrorCode ierr; 4042 4043 PetscFunctionBegin; 4044 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4045 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4046 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4047 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4048 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4049 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4050 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4051 PetscFunctionReturn(0); 4052 } 4053 4054 #undef __FUNCT__ 4055 #define __FUNCT__ "MatCreateAIJ" 4056 /*@C 4057 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4058 (the default parallel PETSc format). For good matrix assembly performance 4059 the user should preallocate the matrix storage by setting the parameters 4060 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4061 performance can be increased by more than a factor of 50. 4062 4063 Collective on MPI_Comm 4064 4065 Input Parameters: 4066 + comm - MPI communicator 4067 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4068 This value should be the same as the local size used in creating the 4069 y vector for the matrix-vector product y = Ax. 4070 . n - This value should be the same as the local size used in creating the 4071 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4072 calculated if N is given) For square matrices n is almost always m. 4073 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4074 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4075 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4076 (same value is used for all local rows) 4077 . d_nnz - array containing the number of nonzeros in the various rows of the 4078 DIAGONAL portion of the local submatrix (possibly different for each row) 4079 or NULL, if d_nz is used to specify the nonzero structure. 4080 The size of this array is equal to the number of local rows, i.e 'm'. 4081 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4082 submatrix (same value is used for all local rows). 4083 - o_nnz - array containing the number of nonzeros in the various rows of the 4084 OFF-DIAGONAL portion of the local submatrix (possibly different for 4085 each row) or NULL, if o_nz is used to specify the nonzero 4086 structure. The size of this array is equal to the number 4087 of local rows, i.e 'm'. 4088 4089 Output Parameter: 4090 . A - the matrix 4091 4092 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4093 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4094 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4095 4096 Notes: 4097 If the *_nnz parameter is given then the *_nz parameter is ignored 4098 4099 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4100 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4101 storage requirements for this matrix. 4102 4103 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4104 processor than it must be used on all processors that share the object for 4105 that argument. 4106 4107 The user MUST specify either the local or global matrix dimensions 4108 (possibly both). 4109 4110 The parallel matrix is partitioned across processors such that the 4111 first m0 rows belong to process 0, the next m1 rows belong to 4112 process 1, the next m2 rows belong to process 2 etc.. where 4113 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4114 values corresponding to [m x N] submatrix. 4115 4116 The columns are logically partitioned with the n0 columns belonging 4117 to 0th partition, the next n1 columns belonging to the next 4118 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4119 4120 The DIAGONAL portion of the local submatrix on any given processor 4121 is the submatrix corresponding to the rows and columns m,n 4122 corresponding to the given processor. i.e diagonal matrix on 4123 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4124 etc. The remaining portion of the local submatrix [m x (N-n)] 4125 constitute the OFF-DIAGONAL portion. The example below better 4126 illustrates this concept. 4127 4128 For a square global matrix we define each processor's diagonal portion 4129 to be its local rows and the corresponding columns (a square submatrix); 4130 each processor's off-diagonal portion encompasses the remainder of the 4131 local matrix (a rectangular submatrix). 4132 4133 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4134 4135 When calling this routine with a single process communicator, a matrix of 4136 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4137 type of communicator, use the construction mechanism: 4138 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4139 4140 By default, this format uses inodes (identical nodes) when possible. 4141 We search for consecutive rows with the same nonzero structure, thereby 4142 reusing matrix information to achieve increased efficiency. 4143 4144 Options Database Keys: 4145 + -mat_no_inode - Do not use inodes 4146 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4147 - -mat_aij_oneindex - Internally use indexing starting at 1 4148 rather than 0. Note that when calling MatSetValues(), 4149 the user still MUST index entries starting at 0! 4150 4151 4152 Example usage: 4153 4154 Consider the following 8x8 matrix with 34 non-zero values, that is 4155 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4156 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4157 as follows: 4158 4159 .vb 4160 1 2 0 | 0 3 0 | 0 4 4161 Proc0 0 5 6 | 7 0 0 | 8 0 4162 9 0 10 | 11 0 0 | 12 0 4163 ------------------------------------- 4164 13 0 14 | 15 16 17 | 0 0 4165 Proc1 0 18 0 | 19 20 21 | 0 0 4166 0 0 0 | 22 23 0 | 24 0 4167 ------------------------------------- 4168 Proc2 25 26 27 | 0 0 28 | 29 0 4169 30 0 0 | 31 32 33 | 0 34 4170 .ve 4171 4172 This can be represented as a collection of submatrices as: 4173 4174 .vb 4175 A B C 4176 D E F 4177 G H I 4178 .ve 4179 4180 Where the submatrices A,B,C are owned by proc0, D,E,F are 4181 owned by proc1, G,H,I are owned by proc2. 4182 4183 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4184 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4185 The 'M','N' parameters are 8,8, and have the same values on all procs. 4186 4187 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4188 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4189 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4190 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4191 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4192 matrix, ans [DF] as another SeqAIJ matrix. 4193 4194 When d_nz, o_nz parameters are specified, d_nz storage elements are 4195 allocated for every row of the local diagonal submatrix, and o_nz 4196 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4197 One way to choose d_nz and o_nz is to use the max nonzerors per local 4198 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4199 In this case, the values of d_nz,o_nz are: 4200 .vb 4201 proc0 : dnz = 2, o_nz = 2 4202 proc1 : dnz = 3, o_nz = 2 4203 proc2 : dnz = 1, o_nz = 4 4204 .ve 4205 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4206 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4207 for proc3. i.e we are using 12+15+10=37 storage locations to store 4208 34 values. 4209 4210 When d_nnz, o_nnz parameters are specified, the storage is specified 4211 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4212 In the above case the values for d_nnz,o_nnz are: 4213 .vb 4214 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4215 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4216 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4217 .ve 4218 Here the space allocated is sum of all the above values i.e 34, and 4219 hence pre-allocation is perfect. 4220 4221 Level: intermediate 4222 4223 .keywords: matrix, aij, compressed row, sparse, parallel 4224 4225 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4226 MPIAIJ, MatCreateMPIAIJWithArrays() 4227 @*/ 4228 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4229 { 4230 PetscErrorCode ierr; 4231 PetscMPIInt size; 4232 4233 PetscFunctionBegin; 4234 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4235 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4236 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4237 if (size > 1) { 4238 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4239 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4240 } else { 4241 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4242 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4243 } 4244 PetscFunctionReturn(0); 4245 } 4246 4247 #undef __FUNCT__ 4248 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4249 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4250 { 4251 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4252 4253 PetscFunctionBegin; 4254 if (Ad) *Ad = a->A; 4255 if (Ao) *Ao = a->B; 4256 if (colmap) *colmap = a->garray; 4257 PetscFunctionReturn(0); 4258 } 4259 4260 #undef __FUNCT__ 4261 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4262 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4263 { 4264 PetscErrorCode ierr; 4265 PetscInt i; 4266 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4267 4268 PetscFunctionBegin; 4269 if (coloring->ctype == IS_COLORING_GLOBAL) { 4270 ISColoringValue *allcolors,*colors; 4271 ISColoring ocoloring; 4272 4273 /* set coloring for diagonal portion */ 4274 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4275 4276 /* set coloring for off-diagonal portion */ 4277 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4278 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4279 for (i=0; i<a->B->cmap->n; i++) { 4280 colors[i] = allcolors[a->garray[i]]; 4281 } 4282 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4283 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4284 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4285 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4286 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4287 ISColoringValue *colors; 4288 PetscInt *larray; 4289 ISColoring ocoloring; 4290 4291 /* set coloring for diagonal portion */ 4292 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4293 for (i=0; i<a->A->cmap->n; i++) { 4294 larray[i] = i + A->cmap->rstart; 4295 } 4296 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4297 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4298 for (i=0; i<a->A->cmap->n; i++) { 4299 colors[i] = coloring->colors[larray[i]]; 4300 } 4301 ierr = PetscFree(larray);CHKERRQ(ierr); 4302 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4303 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4304 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4305 4306 /* set coloring for off-diagonal portion */ 4307 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4308 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4309 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4310 for (i=0; i<a->B->cmap->n; i++) { 4311 colors[i] = coloring->colors[larray[i]]; 4312 } 4313 ierr = PetscFree(larray);CHKERRQ(ierr); 4314 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4315 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4316 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4317 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4318 PetscFunctionReturn(0); 4319 } 4320 4321 #undef __FUNCT__ 4322 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4323 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4324 { 4325 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4326 PetscErrorCode ierr; 4327 4328 PetscFunctionBegin; 4329 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4330 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4331 PetscFunctionReturn(0); 4332 } 4333 4334 #undef __FUNCT__ 4335 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4336 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4337 { 4338 PetscErrorCode ierr; 4339 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4340 PetscInt *indx; 4341 4342 PetscFunctionBegin; 4343 /* This routine will ONLY return MPIAIJ type matrix */ 4344 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4345 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4346 if (n == PETSC_DECIDE) { 4347 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4348 } 4349 /* Check sum(n) = N */ 4350 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4351 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4352 4353 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4354 rstart -= m; 4355 4356 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4357 for (i=0; i<m; i++) { 4358 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4359 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4360 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4361 } 4362 4363 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4364 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4365 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4366 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4367 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4368 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4369 PetscFunctionReturn(0); 4370 } 4371 4372 #undef __FUNCT__ 4373 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4374 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4375 { 4376 PetscErrorCode ierr; 4377 PetscInt m,N,i,rstart,nnz,Ii; 4378 PetscInt *indx; 4379 PetscScalar *values; 4380 4381 PetscFunctionBegin; 4382 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4383 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4384 for (i=0; i<m; i++) { 4385 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4386 Ii = i + rstart; 4387 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4388 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4389 } 4390 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4391 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4392 PetscFunctionReturn(0); 4393 } 4394 4395 #undef __FUNCT__ 4396 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4397 /*@ 4398 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4399 matrices from each processor 4400 4401 Collective on MPI_Comm 4402 4403 Input Parameters: 4404 + comm - the communicators the parallel matrix will live on 4405 . inmat - the input sequential matrices 4406 . n - number of local columns (or PETSC_DECIDE) 4407 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4408 4409 Output Parameter: 4410 . outmat - the parallel matrix generated 4411 4412 Level: advanced 4413 4414 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4415 4416 @*/ 4417 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4418 { 4419 PetscErrorCode ierr; 4420 PetscMPIInt size; 4421 4422 PetscFunctionBegin; 4423 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4424 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4425 if (size == 1) { 4426 if (scall == MAT_INITIAL_MATRIX) { 4427 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4428 } else { 4429 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4430 } 4431 } else { 4432 if (scall == MAT_INITIAL_MATRIX) { 4433 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4434 } 4435 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4436 } 4437 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4438 PetscFunctionReturn(0); 4439 } 4440 4441 #undef __FUNCT__ 4442 #define __FUNCT__ "MatFileSplit" 4443 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4444 { 4445 PetscErrorCode ierr; 4446 PetscMPIInt rank; 4447 PetscInt m,N,i,rstart,nnz; 4448 size_t len; 4449 const PetscInt *indx; 4450 PetscViewer out; 4451 char *name; 4452 Mat B; 4453 const PetscScalar *values; 4454 4455 PetscFunctionBegin; 4456 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4457 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4458 /* Should this be the type of the diagonal block of A? */ 4459 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4460 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4461 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4462 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4463 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4464 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4465 for (i=0; i<m; i++) { 4466 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4467 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4468 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4469 } 4470 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4471 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4472 4473 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4474 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4475 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4476 sprintf(name,"%s.%d",outfile,rank); 4477 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4478 ierr = PetscFree(name);CHKERRQ(ierr); 4479 ierr = MatView(B,out);CHKERRQ(ierr); 4480 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4481 ierr = MatDestroy(&B);CHKERRQ(ierr); 4482 PetscFunctionReturn(0); 4483 } 4484 4485 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4486 #undef __FUNCT__ 4487 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4488 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4489 { 4490 PetscErrorCode ierr; 4491 Mat_Merge_SeqsToMPI *merge; 4492 PetscContainer container; 4493 4494 PetscFunctionBegin; 4495 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4496 if (container) { 4497 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4505 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4506 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4507 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4508 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4509 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4510 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4511 ierr = PetscFree(merge);CHKERRQ(ierr); 4512 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4513 } 4514 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4515 PetscFunctionReturn(0); 4516 } 4517 4518 #include <../src/mat/utils/freespace.h> 4519 #include <petscbt.h> 4520 4521 #undef __FUNCT__ 4522 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4523 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4524 { 4525 PetscErrorCode ierr; 4526 MPI_Comm comm; 4527 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4528 PetscMPIInt size,rank,taga,*len_s; 4529 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4530 PetscInt proc,m; 4531 PetscInt **buf_ri,**buf_rj; 4532 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4533 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4534 MPI_Request *s_waits,*r_waits; 4535 MPI_Status *status; 4536 MatScalar *aa=a->a; 4537 MatScalar **abuf_r,*ba_i; 4538 Mat_Merge_SeqsToMPI *merge; 4539 PetscContainer container; 4540 4541 PetscFunctionBegin; 4542 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4543 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4544 4545 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4546 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4547 4548 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4549 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4550 4551 bi = merge->bi; 4552 bj = merge->bj; 4553 buf_ri = merge->buf_ri; 4554 buf_rj = merge->buf_rj; 4555 4556 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4557 owners = merge->rowmap->range; 4558 len_s = merge->len_s; 4559 4560 /* send and recv matrix values */ 4561 /*-----------------------------*/ 4562 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4563 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4564 4565 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4566 for (proc=0,k=0; proc<size; proc++) { 4567 if (!len_s[proc]) continue; 4568 i = owners[proc]; 4569 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4570 k++; 4571 } 4572 4573 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4574 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4575 ierr = PetscFree(status);CHKERRQ(ierr); 4576 4577 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4578 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4579 4580 /* insert mat values of mpimat */ 4581 /*----------------------------*/ 4582 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4583 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4584 4585 for (k=0; k<merge->nrecv; k++) { 4586 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4587 nrows = *(buf_ri_k[k]); 4588 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4589 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4590 } 4591 4592 /* set values of ba */ 4593 m = merge->rowmap->n; 4594 for (i=0; i<m; i++) { 4595 arow = owners[rank] + i; 4596 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4597 bnzi = bi[i+1] - bi[i]; 4598 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4599 4600 /* add local non-zero vals of this proc's seqmat into ba */ 4601 anzi = ai[arow+1] - ai[arow]; 4602 aj = a->j + ai[arow]; 4603 aa = a->a + ai[arow]; 4604 nextaj = 0; 4605 for (j=0; nextaj<anzi; j++) { 4606 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4607 ba_i[j] += aa[nextaj++]; 4608 } 4609 } 4610 4611 /* add received vals into ba */ 4612 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4613 /* i-th row */ 4614 if (i == *nextrow[k]) { 4615 anzi = *(nextai[k]+1) - *nextai[k]; 4616 aj = buf_rj[k] + *(nextai[k]); 4617 aa = abuf_r[k] + *(nextai[k]); 4618 nextaj = 0; 4619 for (j=0; nextaj<anzi; j++) { 4620 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4621 ba_i[j] += aa[nextaj++]; 4622 } 4623 } 4624 nextrow[k]++; nextai[k]++; 4625 } 4626 } 4627 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4628 } 4629 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4630 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4631 4632 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4633 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4634 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4635 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4636 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4637 PetscFunctionReturn(0); 4638 } 4639 4640 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4641 4642 #undef __FUNCT__ 4643 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4644 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4645 { 4646 PetscErrorCode ierr; 4647 Mat B_mpi; 4648 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4649 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4650 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4651 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4652 PetscInt len,proc,*dnz,*onz,bs,cbs; 4653 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4654 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4655 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4656 MPI_Status *status; 4657 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4658 PetscBT lnkbt; 4659 Mat_Merge_SeqsToMPI *merge; 4660 PetscContainer container; 4661 4662 PetscFunctionBegin; 4663 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4664 4665 /* make sure it is a PETSc comm */ 4666 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4667 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4668 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4669 4670 ierr = PetscNew(&merge);CHKERRQ(ierr); 4671 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4672 4673 /* determine row ownership */ 4674 /*---------------------------------------------------------*/ 4675 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4676 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4677 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4678 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4679 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4680 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4681 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4682 4683 m = merge->rowmap->n; 4684 owners = merge->rowmap->range; 4685 4686 /* determine the number of messages to send, their lengths */ 4687 /*---------------------------------------------------------*/ 4688 len_s = merge->len_s; 4689 4690 len = 0; /* length of buf_si[] */ 4691 merge->nsend = 0; 4692 for (proc=0; proc<size; proc++) { 4693 len_si[proc] = 0; 4694 if (proc == rank) { 4695 len_s[proc] = 0; 4696 } else { 4697 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4698 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4699 } 4700 if (len_s[proc]) { 4701 merge->nsend++; 4702 nrows = 0; 4703 for (i=owners[proc]; i<owners[proc+1]; i++) { 4704 if (ai[i+1] > ai[i]) nrows++; 4705 } 4706 len_si[proc] = 2*(nrows+1); 4707 len += len_si[proc]; 4708 } 4709 } 4710 4711 /* determine the number and length of messages to receive for ij-structure */ 4712 /*-------------------------------------------------------------------------*/ 4713 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4714 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4715 4716 /* post the Irecv of j-structure */ 4717 /*-------------------------------*/ 4718 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4719 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4720 4721 /* post the Isend of j-structure */ 4722 /*--------------------------------*/ 4723 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4724 4725 for (proc=0, k=0; proc<size; proc++) { 4726 if (!len_s[proc]) continue; 4727 i = owners[proc]; 4728 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4729 k++; 4730 } 4731 4732 /* receives and sends of j-structure are complete */ 4733 /*------------------------------------------------*/ 4734 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4735 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4736 4737 /* send and recv i-structure */ 4738 /*---------------------------*/ 4739 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4740 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4741 4742 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4743 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4744 for (proc=0,k=0; proc<size; proc++) { 4745 if (!len_s[proc]) continue; 4746 /* form outgoing message for i-structure: 4747 buf_si[0]: nrows to be sent 4748 [1:nrows]: row index (global) 4749 [nrows+1:2*nrows+1]: i-structure index 4750 */ 4751 /*-------------------------------------------*/ 4752 nrows = len_si[proc]/2 - 1; 4753 buf_si_i = buf_si + nrows+1; 4754 buf_si[0] = nrows; 4755 buf_si_i[0] = 0; 4756 nrows = 0; 4757 for (i=owners[proc]; i<owners[proc+1]; i++) { 4758 anzi = ai[i+1] - ai[i]; 4759 if (anzi) { 4760 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4761 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4762 nrows++; 4763 } 4764 } 4765 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4766 k++; 4767 buf_si += len_si[proc]; 4768 } 4769 4770 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4771 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4772 4773 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4774 for (i=0; i<merge->nrecv; i++) { 4775 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4776 } 4777 4778 ierr = PetscFree(len_si);CHKERRQ(ierr); 4779 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4780 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4781 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4782 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4783 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4784 ierr = PetscFree(status);CHKERRQ(ierr); 4785 4786 /* compute a local seq matrix in each processor */ 4787 /*----------------------------------------------*/ 4788 /* allocate bi array and free space for accumulating nonzero column info */ 4789 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4790 bi[0] = 0; 4791 4792 /* create and initialize a linked list */ 4793 nlnk = N+1; 4794 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4795 4796 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4797 len = ai[owners[rank+1]] - ai[owners[rank]]; 4798 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4799 4800 current_space = free_space; 4801 4802 /* determine symbolic info for each local row */ 4803 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4804 4805 for (k=0; k<merge->nrecv; k++) { 4806 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4807 nrows = *buf_ri_k[k]; 4808 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4809 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4810 } 4811 4812 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4813 len = 0; 4814 for (i=0; i<m; i++) { 4815 bnzi = 0; 4816 /* add local non-zero cols of this proc's seqmat into lnk */ 4817 arow = owners[rank] + i; 4818 anzi = ai[arow+1] - ai[arow]; 4819 aj = a->j + ai[arow]; 4820 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4821 bnzi += nlnk; 4822 /* add received col data into lnk */ 4823 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4824 if (i == *nextrow[k]) { /* i-th row */ 4825 anzi = *(nextai[k]+1) - *nextai[k]; 4826 aj = buf_rj[k] + *nextai[k]; 4827 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4828 bnzi += nlnk; 4829 nextrow[k]++; nextai[k]++; 4830 } 4831 } 4832 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4833 4834 /* if free space is not available, make more free space */ 4835 if (current_space->local_remaining<bnzi) { 4836 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4837 nspacedouble++; 4838 } 4839 /* copy data into free space, then initialize lnk */ 4840 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4841 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4842 4843 current_space->array += bnzi; 4844 current_space->local_used += bnzi; 4845 current_space->local_remaining -= bnzi; 4846 4847 bi[i+1] = bi[i] + bnzi; 4848 } 4849 4850 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4851 4852 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4853 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4854 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4855 4856 /* create symbolic parallel matrix B_mpi */ 4857 /*---------------------------------------*/ 4858 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4859 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4860 if (n==PETSC_DECIDE) { 4861 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4862 } else { 4863 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4864 } 4865 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4866 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4867 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4868 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4869 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4870 4871 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4872 B_mpi->assembled = PETSC_FALSE; 4873 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4874 merge->bi = bi; 4875 merge->bj = bj; 4876 merge->buf_ri = buf_ri; 4877 merge->buf_rj = buf_rj; 4878 merge->coi = NULL; 4879 merge->coj = NULL; 4880 merge->owners_co = NULL; 4881 4882 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4883 4884 /* attach the supporting struct to B_mpi for reuse */ 4885 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4886 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4887 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4888 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4889 *mpimat = B_mpi; 4890 4891 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4892 PetscFunctionReturn(0); 4893 } 4894 4895 #undef __FUNCT__ 4896 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4897 /*@C 4898 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4899 matrices from each processor 4900 4901 Collective on MPI_Comm 4902 4903 Input Parameters: 4904 + comm - the communicators the parallel matrix will live on 4905 . seqmat - the input sequential matrices 4906 . m - number of local rows (or PETSC_DECIDE) 4907 . n - number of local columns (or PETSC_DECIDE) 4908 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4909 4910 Output Parameter: 4911 . mpimat - the parallel matrix generated 4912 4913 Level: advanced 4914 4915 Notes: 4916 The dimensions of the sequential matrix in each processor MUST be the same. 4917 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4918 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4919 @*/ 4920 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4921 { 4922 PetscErrorCode ierr; 4923 PetscMPIInt size; 4924 4925 PetscFunctionBegin; 4926 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4927 if (size == 1) { 4928 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4929 if (scall == MAT_INITIAL_MATRIX) { 4930 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4931 } else { 4932 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4933 } 4934 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4935 PetscFunctionReturn(0); 4936 } 4937 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4938 if (scall == MAT_INITIAL_MATRIX) { 4939 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4940 } 4941 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4942 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4943 PetscFunctionReturn(0); 4944 } 4945 4946 #undef __FUNCT__ 4947 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4948 /*@ 4949 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4950 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4951 with MatGetSize() 4952 4953 Not Collective 4954 4955 Input Parameters: 4956 + A - the matrix 4957 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4958 4959 Output Parameter: 4960 . A_loc - the local sequential matrix generated 4961 4962 Level: developer 4963 4964 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4965 4966 @*/ 4967 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4968 { 4969 PetscErrorCode ierr; 4970 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4971 Mat_SeqAIJ *mat,*a,*b; 4972 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4973 MatScalar *aa,*ba,*cam; 4974 PetscScalar *ca; 4975 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4976 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4977 PetscBool match; 4978 MPI_Comm comm; 4979 PetscMPIInt size; 4980 4981 PetscFunctionBegin; 4982 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4983 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4984 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4985 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4986 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4987 4988 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4989 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4990 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4991 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4992 aa = a->a; ba = b->a; 4993 if (scall == MAT_INITIAL_MATRIX) { 4994 if (size == 1) { 4995 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4996 PetscFunctionReturn(0); 4997 } 4998 4999 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 5000 ci[0] = 0; 5001 for (i=0; i<am; i++) { 5002 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5003 } 5004 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 5005 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 5006 k = 0; 5007 for (i=0; i<am; i++) { 5008 ncols_o = bi[i+1] - bi[i]; 5009 ncols_d = ai[i+1] - ai[i]; 5010 /* off-diagonal portion of A */ 5011 for (jo=0; jo<ncols_o; jo++) { 5012 col = cmap[*bj]; 5013 if (col >= cstart) break; 5014 cj[k] = col; bj++; 5015 ca[k++] = *ba++; 5016 } 5017 /* diagonal portion of A */ 5018 for (j=0; j<ncols_d; j++) { 5019 cj[k] = cstart + *aj++; 5020 ca[k++] = *aa++; 5021 } 5022 /* off-diagonal portion of A */ 5023 for (j=jo; j<ncols_o; j++) { 5024 cj[k] = cmap[*bj++]; 5025 ca[k++] = *ba++; 5026 } 5027 } 5028 /* put together the new matrix */ 5029 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5030 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5031 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5032 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5033 mat->free_a = PETSC_TRUE; 5034 mat->free_ij = PETSC_TRUE; 5035 mat->nonew = 0; 5036 } else if (scall == MAT_REUSE_MATRIX) { 5037 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5038 ci = mat->i; cj = mat->j; cam = mat->a; 5039 for (i=0; i<am; i++) { 5040 /* off-diagonal portion of A */ 5041 ncols_o = bi[i+1] - bi[i]; 5042 for (jo=0; jo<ncols_o; jo++) { 5043 col = cmap[*bj]; 5044 if (col >= cstart) break; 5045 *cam++ = *ba++; bj++; 5046 } 5047 /* diagonal portion of A */ 5048 ncols_d = ai[i+1] - ai[i]; 5049 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5050 /* off-diagonal portion of A */ 5051 for (j=jo; j<ncols_o; j++) { 5052 *cam++ = *ba++; bj++; 5053 } 5054 } 5055 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5056 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5057 PetscFunctionReturn(0); 5058 } 5059 5060 #undef __FUNCT__ 5061 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5062 /*@C 5063 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5064 5065 Not Collective 5066 5067 Input Parameters: 5068 + A - the matrix 5069 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5070 - row, col - index sets of rows and columns to extract (or NULL) 5071 5072 Output Parameter: 5073 . A_loc - the local sequential matrix generated 5074 5075 Level: developer 5076 5077 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5078 5079 @*/ 5080 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5081 { 5082 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5083 PetscErrorCode ierr; 5084 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5085 IS isrowa,iscola; 5086 Mat *aloc; 5087 PetscBool match; 5088 5089 PetscFunctionBegin; 5090 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5091 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5092 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5093 if (!row) { 5094 start = A->rmap->rstart; end = A->rmap->rend; 5095 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5096 } else { 5097 isrowa = *row; 5098 } 5099 if (!col) { 5100 start = A->cmap->rstart; 5101 cmap = a->garray; 5102 nzA = a->A->cmap->n; 5103 nzB = a->B->cmap->n; 5104 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5105 ncols = 0; 5106 for (i=0; i<nzB; i++) { 5107 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5108 else break; 5109 } 5110 imark = i; 5111 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5112 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5113 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5114 } else { 5115 iscola = *col; 5116 } 5117 if (scall != MAT_INITIAL_MATRIX) { 5118 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5119 aloc[0] = *A_loc; 5120 } 5121 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5122 *A_loc = aloc[0]; 5123 ierr = PetscFree(aloc);CHKERRQ(ierr); 5124 if (!row) { 5125 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5126 } 5127 if (!col) { 5128 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5129 } 5130 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5131 PetscFunctionReturn(0); 5132 } 5133 5134 #undef __FUNCT__ 5135 #define __FUNCT__ "MatGetBrowsOfAcols" 5136 /*@C 5137 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5138 5139 Collective on Mat 5140 5141 Input Parameters: 5142 + A,B - the matrices in mpiaij format 5143 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5144 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5145 5146 Output Parameter: 5147 + rowb, colb - index sets of rows and columns of B to extract 5148 - B_seq - the sequential matrix generated 5149 5150 Level: developer 5151 5152 @*/ 5153 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5154 { 5155 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5156 PetscErrorCode ierr; 5157 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5158 IS isrowb,iscolb; 5159 Mat *bseq=NULL; 5160 5161 PetscFunctionBegin; 5162 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5163 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5164 } 5165 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5166 5167 if (scall == MAT_INITIAL_MATRIX) { 5168 start = A->cmap->rstart; 5169 cmap = a->garray; 5170 nzA = a->A->cmap->n; 5171 nzB = a->B->cmap->n; 5172 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5173 ncols = 0; 5174 for (i=0; i<nzB; i++) { /* row < local row index */ 5175 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5176 else break; 5177 } 5178 imark = i; 5179 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5180 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5181 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5182 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5183 } else { 5184 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5185 isrowb = *rowb; iscolb = *colb; 5186 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5187 bseq[0] = *B_seq; 5188 } 5189 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5190 *B_seq = bseq[0]; 5191 ierr = PetscFree(bseq);CHKERRQ(ierr); 5192 if (!rowb) { 5193 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5194 } else { 5195 *rowb = isrowb; 5196 } 5197 if (!colb) { 5198 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5199 } else { 5200 *colb = iscolb; 5201 } 5202 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5203 PetscFunctionReturn(0); 5204 } 5205 5206 #undef __FUNCT__ 5207 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5208 /* 5209 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5210 of the OFF-DIAGONAL portion of local A 5211 5212 Collective on Mat 5213 5214 Input Parameters: 5215 + A,B - the matrices in mpiaij format 5216 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5217 5218 Output Parameter: 5219 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5220 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5221 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5222 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5223 5224 Level: developer 5225 5226 */ 5227 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5228 { 5229 VecScatter_MPI_General *gen_to,*gen_from; 5230 PetscErrorCode ierr; 5231 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5232 Mat_SeqAIJ *b_oth; 5233 VecScatter ctx =a->Mvctx; 5234 MPI_Comm comm; 5235 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5236 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5237 PetscScalar *rvalues,*svalues; 5238 MatScalar *b_otha,*bufa,*bufA; 5239 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5240 MPI_Request *rwaits = NULL,*swaits = NULL; 5241 MPI_Status *sstatus,rstatus; 5242 PetscMPIInt jj,size; 5243 PetscInt *cols,sbs,rbs; 5244 PetscScalar *vals; 5245 5246 PetscFunctionBegin; 5247 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5248 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5249 if (size == 1) PetscFunctionReturn(0); 5250 5251 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5252 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5253 } 5254 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5255 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5256 5257 gen_to = (VecScatter_MPI_General*)ctx->todata; 5258 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5259 rvalues = gen_from->values; /* holds the length of receiving row */ 5260 svalues = gen_to->values; /* holds the length of sending row */ 5261 nrecvs = gen_from->n; 5262 nsends = gen_to->n; 5263 5264 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5265 srow = gen_to->indices; /* local row index to be sent */ 5266 sstarts = gen_to->starts; 5267 sprocs = gen_to->procs; 5268 sstatus = gen_to->sstatus; 5269 sbs = gen_to->bs; 5270 rstarts = gen_from->starts; 5271 rprocs = gen_from->procs; 5272 rbs = gen_from->bs; 5273 5274 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5275 if (scall == MAT_INITIAL_MATRIX) { 5276 /* i-array */ 5277 /*---------*/ 5278 /* post receives */ 5279 for (i=0; i<nrecvs; i++) { 5280 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5281 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5282 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5283 } 5284 5285 /* pack the outgoing message */ 5286 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5287 5288 sstartsj[0] = 0; 5289 rstartsj[0] = 0; 5290 len = 0; /* total length of j or a array to be sent */ 5291 k = 0; 5292 for (i=0; i<nsends; i++) { 5293 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5294 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5295 for (j=0; j<nrows; j++) { 5296 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5297 for (l=0; l<sbs; l++) { 5298 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5299 5300 rowlen[j*sbs+l] = ncols; 5301 5302 len += ncols; 5303 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5304 } 5305 k++; 5306 } 5307 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5308 5309 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5310 } 5311 /* recvs and sends of i-array are completed */ 5312 i = nrecvs; 5313 while (i--) { 5314 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5315 } 5316 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5317 5318 /* allocate buffers for sending j and a arrays */ 5319 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5320 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5321 5322 /* create i-array of B_oth */ 5323 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5324 5325 b_othi[0] = 0; 5326 len = 0; /* total length of j or a array to be received */ 5327 k = 0; 5328 for (i=0; i<nrecvs; i++) { 5329 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5330 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5331 for (j=0; j<nrows; j++) { 5332 b_othi[k+1] = b_othi[k] + rowlen[j]; 5333 len += rowlen[j]; k++; 5334 } 5335 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5336 } 5337 5338 /* allocate space for j and a arrrays of B_oth */ 5339 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5340 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5341 5342 /* j-array */ 5343 /*---------*/ 5344 /* post receives of j-array */ 5345 for (i=0; i<nrecvs; i++) { 5346 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5347 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5348 } 5349 5350 /* pack the outgoing message j-array */ 5351 k = 0; 5352 for (i=0; i<nsends; i++) { 5353 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5354 bufJ = bufj+sstartsj[i]; 5355 for (j=0; j<nrows; j++) { 5356 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5357 for (ll=0; ll<sbs; ll++) { 5358 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5359 for (l=0; l<ncols; l++) { 5360 *bufJ++ = cols[l]; 5361 } 5362 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5363 } 5364 } 5365 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5366 } 5367 5368 /* recvs and sends of j-array are completed */ 5369 i = nrecvs; 5370 while (i--) { 5371 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5372 } 5373 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5374 } else if (scall == MAT_REUSE_MATRIX) { 5375 sstartsj = *startsj_s; 5376 rstartsj = *startsj_r; 5377 bufa = *bufa_ptr; 5378 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5379 b_otha = b_oth->a; 5380 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5381 5382 /* a-array */ 5383 /*---------*/ 5384 /* post receives of a-array */ 5385 for (i=0; i<nrecvs; i++) { 5386 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5387 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5388 } 5389 5390 /* pack the outgoing message a-array */ 5391 k = 0; 5392 for (i=0; i<nsends; i++) { 5393 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5394 bufA = bufa+sstartsj[i]; 5395 for (j=0; j<nrows; j++) { 5396 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5397 for (ll=0; ll<sbs; ll++) { 5398 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5399 for (l=0; l<ncols; l++) { 5400 *bufA++ = vals[l]; 5401 } 5402 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5403 } 5404 } 5405 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5406 } 5407 /* recvs and sends of a-array are completed */ 5408 i = nrecvs; 5409 while (i--) { 5410 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5411 } 5412 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5413 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5414 5415 if (scall == MAT_INITIAL_MATRIX) { 5416 /* put together the new matrix */ 5417 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5418 5419 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5420 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5421 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5422 b_oth->free_a = PETSC_TRUE; 5423 b_oth->free_ij = PETSC_TRUE; 5424 b_oth->nonew = 0; 5425 5426 ierr = PetscFree(bufj);CHKERRQ(ierr); 5427 if (!startsj_s || !bufa_ptr) { 5428 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5429 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5430 } else { 5431 *startsj_s = sstartsj; 5432 *startsj_r = rstartsj; 5433 *bufa_ptr = bufa; 5434 } 5435 } 5436 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5437 PetscFunctionReturn(0); 5438 } 5439 5440 #undef __FUNCT__ 5441 #define __FUNCT__ "MatGetCommunicationStructs" 5442 /*@C 5443 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5444 5445 Not Collective 5446 5447 Input Parameters: 5448 . A - The matrix in mpiaij format 5449 5450 Output Parameter: 5451 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5452 . colmap - A map from global column index to local index into lvec 5453 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5454 5455 Level: developer 5456 5457 @*/ 5458 #if defined(PETSC_USE_CTABLE) 5459 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5460 #else 5461 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5462 #endif 5463 { 5464 Mat_MPIAIJ *a; 5465 5466 PetscFunctionBegin; 5467 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5468 PetscValidPointer(lvec, 2); 5469 PetscValidPointer(colmap, 3); 5470 PetscValidPointer(multScatter, 4); 5471 a = (Mat_MPIAIJ*) A->data; 5472 if (lvec) *lvec = a->lvec; 5473 if (colmap) *colmap = a->colmap; 5474 if (multScatter) *multScatter = a->Mvctx; 5475 PetscFunctionReturn(0); 5476 } 5477 5478 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5479 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5480 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5481 #if defined(PETSC_HAVE_ELEMENTAL) 5482 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5483 #endif 5484 5485 #undef __FUNCT__ 5486 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5487 /* 5488 Computes (B'*A')' since computing B*A directly is untenable 5489 5490 n p p 5491 ( ) ( ) ( ) 5492 m ( A ) * n ( B ) = m ( C ) 5493 ( ) ( ) ( ) 5494 5495 */ 5496 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5497 { 5498 PetscErrorCode ierr; 5499 Mat At,Bt,Ct; 5500 5501 PetscFunctionBegin; 5502 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5503 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5504 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5505 ierr = MatDestroy(&At);CHKERRQ(ierr); 5506 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5507 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5508 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5509 PetscFunctionReturn(0); 5510 } 5511 5512 #undef __FUNCT__ 5513 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5514 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5515 { 5516 PetscErrorCode ierr; 5517 PetscInt m=A->rmap->n,n=B->cmap->n; 5518 Mat Cmat; 5519 5520 PetscFunctionBegin; 5521 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5522 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5523 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5524 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5525 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5526 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5527 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5528 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5529 5530 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5531 5532 *C = Cmat; 5533 PetscFunctionReturn(0); 5534 } 5535 5536 /* ----------------------------------------------------------------*/ 5537 #undef __FUNCT__ 5538 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5539 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5540 { 5541 PetscErrorCode ierr; 5542 5543 PetscFunctionBegin; 5544 if (scall == MAT_INITIAL_MATRIX) { 5545 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5546 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5547 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5548 } 5549 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5550 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5551 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5552 PetscFunctionReturn(0); 5553 } 5554 5555 #if defined(PETSC_HAVE_MUMPS) 5556 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5557 #endif 5558 #if defined(PETSC_HAVE_PASTIX) 5559 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5560 #endif 5561 #if defined(PETSC_HAVE_SUPERLU_DIST) 5562 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5563 #endif 5564 #if defined(PETSC_HAVE_CLIQUE) 5565 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5566 #endif 5567 5568 /*MC 5569 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5570 5571 Options Database Keys: 5572 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5573 5574 Level: beginner 5575 5576 .seealso: MatCreateAIJ() 5577 M*/ 5578 5579 #undef __FUNCT__ 5580 #define __FUNCT__ "MatCreate_MPIAIJ" 5581 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5582 { 5583 Mat_MPIAIJ *b; 5584 PetscErrorCode ierr; 5585 PetscMPIInt size; 5586 5587 PetscFunctionBegin; 5588 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5589 5590 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5591 B->data = (void*)b; 5592 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5593 B->assembled = PETSC_FALSE; 5594 B->insertmode = NOT_SET_VALUES; 5595 b->size = size; 5596 5597 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5598 5599 /* build cache for off array entries formed */ 5600 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5601 5602 b->donotstash = PETSC_FALSE; 5603 b->colmap = 0; 5604 b->garray = 0; 5605 b->roworiented = PETSC_TRUE; 5606 5607 /* stuff used for matrix vector multiply */ 5608 b->lvec = NULL; 5609 b->Mvctx = NULL; 5610 5611 /* stuff for MatGetRow() */ 5612 b->rowindices = 0; 5613 b->rowvalues = 0; 5614 b->getrowactive = PETSC_FALSE; 5615 5616 /* flexible pointer used in CUSP/CUSPARSE classes */ 5617 b->spptr = NULL; 5618 5619 #if defined(PETSC_HAVE_MUMPS) 5620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5621 #endif 5622 #if defined(PETSC_HAVE_PASTIX) 5623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5624 #endif 5625 #if defined(PETSC_HAVE_SUPERLU_DIST) 5626 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5627 #endif 5628 #if defined(PETSC_HAVE_CLIQUE) 5629 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5630 #endif 5631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5632 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5635 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5641 #if defined(PETSC_HAVE_ELEMENTAL) 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5643 #endif 5644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5647 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5648 PetscFunctionReturn(0); 5649 } 5650 5651 #undef __FUNCT__ 5652 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5653 /*@ 5654 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5655 and "off-diagonal" part of the matrix in CSR format. 5656 5657 Collective on MPI_Comm 5658 5659 Input Parameters: 5660 + comm - MPI communicator 5661 . m - number of local rows (Cannot be PETSC_DECIDE) 5662 . n - This value should be the same as the local size used in creating the 5663 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5664 calculated if N is given) For square matrices n is almost always m. 5665 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5666 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5667 . i - row indices for "diagonal" portion of matrix 5668 . j - column indices 5669 . a - matrix values 5670 . oi - row indices for "off-diagonal" portion of matrix 5671 . oj - column indices 5672 - oa - matrix values 5673 5674 Output Parameter: 5675 . mat - the matrix 5676 5677 Level: advanced 5678 5679 Notes: 5680 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5681 must free the arrays once the matrix has been destroyed and not before. 5682 5683 The i and j indices are 0 based 5684 5685 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5686 5687 This sets local rows and cannot be used to set off-processor values. 5688 5689 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5690 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5691 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5692 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5693 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5694 communication if it is known that only local entries will be set. 5695 5696 .keywords: matrix, aij, compressed row, sparse, parallel 5697 5698 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5699 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5700 @*/ 5701 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5702 { 5703 PetscErrorCode ierr; 5704 Mat_MPIAIJ *maij; 5705 5706 PetscFunctionBegin; 5707 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5708 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5709 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5710 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5711 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5712 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5713 maij = (Mat_MPIAIJ*) (*mat)->data; 5714 5715 (*mat)->preallocated = PETSC_TRUE; 5716 5717 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5718 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5719 5720 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5721 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5722 5723 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5724 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5725 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5726 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5727 5728 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5729 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5730 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5731 PetscFunctionReturn(0); 5732 } 5733 5734 /* 5735 Special version for direct calls from Fortran 5736 */ 5737 #include <petsc-private/fortranimpl.h> 5738 5739 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5740 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5741 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5742 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5743 #endif 5744 5745 /* Change these macros so can be used in void function */ 5746 #undef CHKERRQ 5747 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5748 #undef SETERRQ2 5749 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5750 #undef SETERRQ3 5751 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5752 #undef SETERRQ 5753 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5754 5755 #undef __FUNCT__ 5756 #define __FUNCT__ "matsetvaluesmpiaij_" 5757 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5758 { 5759 Mat mat = *mmat; 5760 PetscInt m = *mm, n = *mn; 5761 InsertMode addv = *maddv; 5762 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5763 PetscScalar value; 5764 PetscErrorCode ierr; 5765 5766 MatCheckPreallocated(mat,1); 5767 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5768 5769 #if defined(PETSC_USE_DEBUG) 5770 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5771 #endif 5772 { 5773 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5774 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5775 PetscBool roworiented = aij->roworiented; 5776 5777 /* Some Variables required in the macro */ 5778 Mat A = aij->A; 5779 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5780 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5781 MatScalar *aa = a->a; 5782 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5783 Mat B = aij->B; 5784 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5785 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5786 MatScalar *ba = b->a; 5787 5788 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5789 PetscInt nonew = a->nonew; 5790 MatScalar *ap1,*ap2; 5791 5792 PetscFunctionBegin; 5793 for (i=0; i<m; i++) { 5794 if (im[i] < 0) continue; 5795 #if defined(PETSC_USE_DEBUG) 5796 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5797 #endif 5798 if (im[i] >= rstart && im[i] < rend) { 5799 row = im[i] - rstart; 5800 lastcol1 = -1; 5801 rp1 = aj + ai[row]; 5802 ap1 = aa + ai[row]; 5803 rmax1 = aimax[row]; 5804 nrow1 = ailen[row]; 5805 low1 = 0; 5806 high1 = nrow1; 5807 lastcol2 = -1; 5808 rp2 = bj + bi[row]; 5809 ap2 = ba + bi[row]; 5810 rmax2 = bimax[row]; 5811 nrow2 = bilen[row]; 5812 low2 = 0; 5813 high2 = nrow2; 5814 5815 for (j=0; j<n; j++) { 5816 if (roworiented) value = v[i*n+j]; 5817 else value = v[i+j*m]; 5818 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5819 if (in[j] >= cstart && in[j] < cend) { 5820 col = in[j] - cstart; 5821 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5822 } else if (in[j] < 0) continue; 5823 #if defined(PETSC_USE_DEBUG) 5824 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5825 #endif 5826 else { 5827 if (mat->was_assembled) { 5828 if (!aij->colmap) { 5829 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5830 } 5831 #if defined(PETSC_USE_CTABLE) 5832 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5833 col--; 5834 #else 5835 col = aij->colmap[in[j]] - 1; 5836 #endif 5837 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5838 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5839 col = in[j]; 5840 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5841 B = aij->B; 5842 b = (Mat_SeqAIJ*)B->data; 5843 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5844 rp2 = bj + bi[row]; 5845 ap2 = ba + bi[row]; 5846 rmax2 = bimax[row]; 5847 nrow2 = bilen[row]; 5848 low2 = 0; 5849 high2 = nrow2; 5850 bm = aij->B->rmap->n; 5851 ba = b->a; 5852 } 5853 } else col = in[j]; 5854 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5855 } 5856 } 5857 } else if (!aij->donotstash) { 5858 if (roworiented) { 5859 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5860 } else { 5861 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5862 } 5863 } 5864 } 5865 } 5866 PetscFunctionReturnVoid(); 5867 } 5868 5869