1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 if (!rank) { 206 bses[0] = gmat->rmap->bs; 207 bses[1] = gmat->cmap->bs; 208 } 209 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 210 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 211 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 212 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 213 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 214 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 215 216 rowners[0] = 0; 217 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 218 rstart = rowners[rank]; 219 rend = rowners[rank+1]; 220 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 221 if (!rank) { 222 gmata = (Mat_SeqAIJ*) gmat->data; 223 /* send row lengths to all processors */ 224 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 225 for (i=1; i<size; i++) { 226 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 227 } 228 /* determine number diagonal and off-diagonal counts */ 229 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 230 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 264 jj = 0; 265 for (i=0; i<m; i++) { 266 for (j=0; j<dlens[i]; j++) { 267 if (gmataj[jj] < rstart) ld[i]++; 268 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 269 jj++; 270 } 271 } 272 /* receive numerical values */ 273 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 275 } 276 /* set preallocation */ 277 for (i=0; i<m; i++) { 278 dlens[i] -= olens[i]; 279 } 280 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 281 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 282 283 for (i=0; i<m; i++) { 284 dlens[i] += olens[i]; 285 } 286 cnt = 0; 287 for (i=0; i<m; i++) { 288 row = rstart + i; 289 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 290 cnt += dlens[i]; 291 } 292 if (rank) { 293 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 294 } 295 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 296 ierr = PetscFree(rowners);CHKERRQ(ierr); 297 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 300 *inmat = mat; 301 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 302 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 303 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 304 mat = *inmat; 305 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 306 if (!rank) { 307 /* send numerical values to other processes */ 308 gmata = (Mat_SeqAIJ*) gmat->data; 309 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 310 gmataa = gmata->a; 311 for (i=1; i<size; i++) { 312 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 313 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 314 } 315 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 316 } else { 317 /* receive numerical values from process 0*/ 318 nz = Ad->nz + Ao->nz; 319 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 320 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 321 } 322 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 323 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 324 ad = Ad->a; 325 ao = Ao->a; 326 if (mat->rmap->n) { 327 i = 0; 328 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 329 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 330 } 331 for (i=1; i<mat->rmap->n; i++) { 332 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 333 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 334 } 335 i--; 336 if (mat->rmap->n) { 337 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 338 } 339 if (rank) { 340 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 341 } 342 } 343 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined(PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++) { 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 373 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 374 #endif 375 PetscFunctionReturn(0); 376 } 377 378 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 379 { \ 380 if (col <= lastcol1) low1 = 0; \ 381 else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; \ 416 else high2 = nrow2; \ 417 lastcol2 = col; \ 418 while (high2-low2 > 5) { \ 419 t = (low2+high2)/2; \ 420 if (rp2[t] > col) high2 = t; \ 421 else low2 = t; \ 422 } \ 423 for (_i=low2; _i<high2; _i++) { \ 424 if (rp2[_i] > col) break; \ 425 if (rp2[_i] == col) { \ 426 if (addv == ADD_VALUES) ap2[_i] += value; \ 427 else ap2[_i] = value; \ 428 goto b_noinsert; \ 429 } \ 430 } \ 431 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 433 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 434 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 435 N = nrow2++ - 1; b->nz++; high2++; \ 436 /* shift up all the later entries in this row */ \ 437 for (ii=N; ii>=_i; ii--) { \ 438 rp2[ii+1] = rp2[ii]; \ 439 ap2[ii+1] = ap2[ii]; \ 440 } \ 441 rp2[_i] = col; \ 442 ap2[_i] = value; \ 443 b_noinsert: ; \ 444 bilen[row] = nrow2; \ 445 } 446 447 #undef __FUNCT__ 448 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 449 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 450 { 451 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 452 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 453 PetscErrorCode ierr; 454 PetscInt l,*garray = mat->garray,diag; 455 456 PetscFunctionBegin; 457 /* code only works for square matrices A */ 458 459 /* find size of row to the left of the diagonal part */ 460 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 461 row = row - diag; 462 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 463 if (garray[b->j[b->i[row]+l]] > diag) break; 464 } 465 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 466 467 /* diagonal part */ 468 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 469 470 /* right of diagonal part */ 471 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatSetValues_MPIAIJ" 477 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 478 { 479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 480 PetscScalar value; 481 PetscErrorCode ierr; 482 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 483 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 484 PetscBool roworiented = aij->roworiented; 485 486 /* Some Variables required in the macro */ 487 Mat A = aij->A; 488 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 489 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 490 MatScalar *aa = a->a; 491 PetscBool ignorezeroentries = a->ignorezeroentries; 492 Mat B = aij->B; 493 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 494 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 495 MatScalar *ba = b->a; 496 497 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 498 PetscInt nonew; 499 MatScalar *ap1,*ap2; 500 501 PetscFunctionBegin; 502 if (v) PetscValidScalarPointer(v,6); 503 for (i=0; i<m; i++) { 504 if (im[i] < 0) continue; 505 #if defined(PETSC_USE_DEBUG) 506 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 507 #endif 508 if (im[i] >= rstart && im[i] < rend) { 509 row = im[i] - rstart; 510 lastcol1 = -1; 511 rp1 = aj + ai[row]; 512 ap1 = aa + ai[row]; 513 rmax1 = aimax[row]; 514 nrow1 = ailen[row]; 515 low1 = 0; 516 high1 = nrow1; 517 lastcol2 = -1; 518 rp2 = bj + bi[row]; 519 ap2 = ba + bi[row]; 520 rmax2 = bimax[row]; 521 nrow2 = bilen[row]; 522 low2 = 0; 523 high2 = nrow2; 524 525 for (j=0; j<n; j++) { 526 if (v) { 527 if (roworiented) value = v[i*n+j]; 528 else value = v[i+j*m]; 529 } else value = 0.0; 530 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 531 if (in[j] >= cstart && in[j] < cend) { 532 col = in[j] - cstart; 533 nonew = a->nonew; 534 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 535 } else if (in[j] < 0) continue; 536 #if defined(PETSC_USE_DEBUG) 537 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 538 #endif 539 else { 540 if (mat->was_assembled) { 541 if (!aij->colmap) { 542 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 543 } 544 #if defined(PETSC_USE_CTABLE) 545 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 546 col--; 547 #else 548 col = aij->colmap[in[j]] - 1; 549 #endif 550 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 551 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 552 col = in[j]; 553 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 554 B = aij->B; 555 b = (Mat_SeqAIJ*)B->data; 556 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 557 rp2 = bj + bi[row]; 558 ap2 = ba + bi[row]; 559 rmax2 = bimax[row]; 560 nrow2 = bilen[row]; 561 low2 = 0; 562 high2 = nrow2; 563 bm = aij->B->rmap->n; 564 ba = b->a; 565 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 566 } else col = in[j]; 567 nonew = b->nonew; 568 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 569 } 570 } 571 } else { 572 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 573 if (!aij->donotstash) { 574 mat->assembled = PETSC_FALSE; 575 if (roworiented) { 576 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 577 } else { 578 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 579 } 580 } 581 } 582 } 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatGetValues_MPIAIJ" 588 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 589 { 590 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 591 PetscErrorCode ierr; 592 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 593 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 594 595 PetscFunctionBegin; 596 for (i=0; i<m; i++) { 597 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 598 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 599 if (idxm[i] >= rstart && idxm[i] < rend) { 600 row = idxm[i] - rstart; 601 for (j=0; j<n; j++) { 602 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 603 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 604 if (idxn[j] >= cstart && idxn[j] < cend) { 605 col = idxn[j] - cstart; 606 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 607 } else { 608 if (!aij->colmap) { 609 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 610 } 611 #if defined(PETSC_USE_CTABLE) 612 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 613 col--; 614 #else 615 col = aij->colmap[idxn[j]] - 1; 616 #endif 617 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 618 else { 619 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } 621 } 622 } 623 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 624 } 625 PetscFunctionReturn(0); 626 } 627 628 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 632 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 633 { 634 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 635 PetscErrorCode ierr; 636 PetscInt nstash,reallocs; 637 InsertMode addv; 638 639 PetscFunctionBegin; 640 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 668 669 PetscFunctionBegin; 670 if (!aij->donotstash && !mat->nooffprocentries) { 671 while (1) { 672 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 673 if (!flg) break; 674 675 for (i=0; i<n; ) { 676 /* Now identify the consecutive vals belonging to the same row */ 677 for (j=i,rstart=row[j]; j<n; j++) { 678 if (row[j] != rstart) break; 679 } 680 if (j < n) ncols = j-i; 681 else ncols = n-i; 682 /* Now assemble all these values with a single function call */ 683 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 684 685 i = j; 686 } 687 } 688 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 689 } 690 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 692 693 /* determine if any processor has disassembled, if so we must 694 also disassemble ourselfs, in order that we may reassemble. */ 695 /* 696 if nonzero structure of submatrix B cannot change then we know that 697 no processor disassembled thus we can skip this stuff 698 */ 699 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 700 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 701 if (mat->was_assembled && !other_disassembled) { 702 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 } 705 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 706 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 709 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 710 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 711 712 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 713 714 aij->rowvalues = 0; 715 716 /* used by MatAXPY() */ 717 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 718 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 719 720 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 721 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 727 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 728 { 729 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 734 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatZeroRows_MPIAIJ" 740 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 741 { 742 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 743 PetscInt *owners = A->rmap->range; 744 PetscInt n = A->rmap->n; 745 PetscMPIInt size = mat->size; 746 PetscSF sf; 747 PetscInt *lrows; 748 PetscSFNode *rrows; 749 PetscInt lastidx = -1, r, p = 0, len = 0; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 /* Create SF where leaves are input rows and roots are owned rows */ 754 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 755 for (r = 0; r < n; ++r) lrows[r] = -1; 756 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 757 for (r = 0; r < N; ++r) { 758 const PetscInt idx = rows[r]; 759 PetscBool found = PETSC_FALSE; 760 /* Trick for efficient searching for sorted rows */ 761 if (lastidx > idx) p = 0; 762 lastidx = idx; 763 for (; p < size; ++p) { 764 if (idx >= owners[p] && idx < owners[p+1]) { 765 rrows[r].rank = p; 766 rrows[r].index = rows[r] - owners[p]; 767 found = PETSC_TRUE; 768 break; 769 } 770 } 771 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 772 } 773 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 774 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 775 /* Collect flags for rows to be zeroed */ 776 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 777 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 778 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 779 /* Compress and put in row numbers */ 780 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 781 /* fix right hand side if needed */ 782 if (x && b) { 783 const PetscScalar *xx; 784 PetscScalar *bb; 785 786 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 787 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 788 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 789 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 790 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 791 } 792 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 793 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 794 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 795 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 796 } else if (diag != 0.0) { 797 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 798 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 799 for (r = 0; r < len; ++r) { 800 const PetscInt row = lrows[r] + A->rmap->rstart; 801 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 802 } 803 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 804 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 805 } else { 806 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 807 } 808 ierr = PetscFree(lrows);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 814 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 815 { 816 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 817 PetscErrorCode ierr; 818 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 819 PetscInt i,*owners = A->rmap->range; 820 PetscInt *sizes,j,idx,nsends; 821 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 822 PetscInt *rvalues,count,base,slen,*source; 823 PetscInt *lens,*lrows,*values,m; 824 MPI_Comm comm; 825 MPI_Request *send_waits,*recv_waits; 826 MPI_Status recv_status,*send_status; 827 const PetscScalar *xx; 828 PetscScalar *bb,*mask; 829 Vec xmask,lmask; 830 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 831 const PetscInt *aj, *ii,*ridx; 832 PetscScalar *aa; 833 #if defined(PETSC_DEBUG) 834 PetscBool found = PETSC_FALSE; 835 #endif 836 837 PetscFunctionBegin; 838 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 839 /* first count number of contributors to each processor */ 840 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 841 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 842 j = 0; 843 for (i=0; i<N; i++) { 844 if (lastidx > (idx = rows[i])) j = 0; 845 lastidx = idx; 846 for (; j<size; j++) { 847 if (idx >= owners[j] && idx < owners[j+1]) { 848 sizes[2*j]++; 849 sizes[2*j+1] = 1; 850 owner[i] = j; 851 #if defined(PETSC_DEBUG) 852 found = PETSC_TRUE; 853 #endif 854 break; 855 } 856 } 857 #if defined(PETSC_DEBUG) 858 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 859 found = PETSC_FALSE; 860 #endif 861 } 862 nsends = 0; for (i=0; i<size; i++) nsends += sizes[2*i+1]; 863 864 /* inform other processors of number of messages and max length*/ 865 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 866 867 /* post receives: */ 868 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 869 ierr = PetscMalloc1((nrecvs+1),&recv_waits);CHKERRQ(ierr); 870 for (i=0; i<nrecvs; i++) { 871 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 872 } 873 874 /* do sends: 875 1) starts[i] gives the starting index in svalues for stuff going to 876 the ith processor 877 */ 878 ierr = PetscMalloc1((N+1),&svalues);CHKERRQ(ierr); 879 ierr = PetscMalloc1((nsends+1),&send_waits);CHKERRQ(ierr); 880 ierr = PetscMalloc1((size+1),&starts);CHKERRQ(ierr); 881 882 starts[0] = 0; 883 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 884 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 885 886 starts[0] = 0; 887 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 888 count = 0; 889 for (i=0; i<size; i++) { 890 if (sizes[2*i+1]) { 891 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 892 } 893 } 894 ierr = PetscFree(starts);CHKERRQ(ierr); 895 896 base = owners[rank]; 897 898 /* wait on receives */ 899 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 900 count = nrecvs; slen = 0; 901 while (count) { 902 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 903 /* unpack receives into our local space */ 904 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 905 906 source[imdex] = recv_status.MPI_SOURCE; 907 lens[imdex] = n; 908 slen += n; 909 count--; 910 } 911 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 912 913 /* move the data into the send scatter */ 914 ierr = PetscMalloc1((slen+1),&lrows);CHKERRQ(ierr); 915 count = 0; 916 for (i=0; i<nrecvs; i++) { 917 values = rvalues + i*nmax; 918 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 919 } 920 ierr = PetscFree(rvalues);CHKERRQ(ierr); 921 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 922 ierr = PetscFree(owner);CHKERRQ(ierr); 923 ierr = PetscFree(sizes);CHKERRQ(ierr); 924 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 925 926 /* zero diagonal part of matrix */ 927 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 928 929 /* handle off diagonal part of matrix */ 930 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 931 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 932 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 933 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 934 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 935 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 937 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 938 if (x) { 939 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 942 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 943 } 944 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 945 946 /* remove zeroed rows of off diagonal matrix */ 947 ii = aij->i; 948 for (i=0; i<slen; i++) { 949 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 950 } 951 952 /* loop over all elements of off process part of matrix zeroing removed columns*/ 953 if (aij->compressedrow.use) { 954 m = aij->compressedrow.nrows; 955 ii = aij->compressedrow.i; 956 ridx = aij->compressedrow.rindex; 957 for (i=0; i<m; i++) { 958 n = ii[i+1] - ii[i]; 959 aj = aij->j + ii[i]; 960 aa = aij->a + ii[i]; 961 962 for (j=0; j<n; j++) { 963 if (PetscAbsScalar(mask[*aj])) { 964 if (b) bb[*ridx] -= *aa*xx[*aj]; 965 *aa = 0.0; 966 } 967 aa++; 968 aj++; 969 } 970 ridx++; 971 } 972 } else { /* do not use compressed row format */ 973 m = l->B->rmap->n; 974 for (i=0; i<m; i++) { 975 n = ii[i+1] - ii[i]; 976 aj = aij->j + ii[i]; 977 aa = aij->a + ii[i]; 978 for (j=0; j<n; j++) { 979 if (PetscAbsScalar(mask[*aj])) { 980 if (b) bb[i] -= *aa*xx[*aj]; 981 *aa = 0.0; 982 } 983 aa++; 984 aj++; 985 } 986 } 987 } 988 if (x) { 989 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 990 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 991 } 992 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 993 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 994 ierr = PetscFree(lrows);CHKERRQ(ierr); 995 996 /* wait on sends */ 997 if (nsends) { 998 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 999 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1000 ierr = PetscFree(send_status);CHKERRQ(ierr); 1001 } 1002 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1003 ierr = PetscFree(svalues);CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatMult_MPIAIJ" 1009 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1010 { 1011 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1012 PetscErrorCode ierr; 1013 PetscInt nt; 1014 1015 PetscFunctionBegin; 1016 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1017 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1018 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1019 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1020 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1021 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1027 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1028 { 1029 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1030 PetscErrorCode ierr; 1031 1032 PetscFunctionBegin; 1033 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1039 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1040 { 1041 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1042 PetscErrorCode ierr; 1043 1044 PetscFunctionBegin; 1045 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1046 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1047 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1048 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1054 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 PetscBool merged; 1059 1060 PetscFunctionBegin; 1061 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1062 /* do nondiagonal part */ 1063 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1064 if (!merged) { 1065 /* send it on its way */ 1066 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1067 /* do local part */ 1068 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1069 /* receive remote parts: note this assumes the values are not actually */ 1070 /* added in yy until the next line, */ 1071 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1072 } else { 1073 /* do local part */ 1074 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1075 /* send it on its way */ 1076 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1077 /* values actually were received in the Begin() but we need to call this nop */ 1078 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1079 } 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1085 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1089 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1090 IS Me,Notme; 1091 PetscErrorCode ierr; 1092 PetscInt M,N,first,last,*notme,i; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1098 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1099 if (!*f) PetscFunctionReturn(0); 1100 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1101 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1102 if (size == 1) PetscFunctionReturn(0); 1103 1104 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1105 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1106 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1107 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1108 for (i=0; i<first; i++) notme[i] = i; 1109 for (i=last; i<M; i++) notme[i-last+first] = i; 1110 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1111 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1112 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1113 Aoff = Aoffs[0]; 1114 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1115 Boff = Boffs[0]; 1116 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1117 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1118 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1119 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1120 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1121 ierr = PetscFree(notme);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1127 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1128 { 1129 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1130 PetscErrorCode ierr; 1131 1132 PetscFunctionBegin; 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 /* send it on its way */ 1136 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1137 /* do local part */ 1138 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1139 /* receive remote parts */ 1140 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 /* 1145 This only works correctly for square matrices where the subblock A->A is the 1146 diagonal block 1147 */ 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1150 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1151 { 1152 PetscErrorCode ierr; 1153 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1154 1155 PetscFunctionBegin; 1156 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1157 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1158 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatScale_MPIAIJ" 1164 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1171 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatDestroy_MPIAIJ" 1177 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1178 { 1179 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1180 PetscErrorCode ierr; 1181 1182 PetscFunctionBegin; 1183 #if defined(PETSC_USE_LOG) 1184 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1185 #endif 1186 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1187 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1188 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1189 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1190 #if defined(PETSC_USE_CTABLE) 1191 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1192 #else 1193 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1194 #endif 1195 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1196 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1197 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1198 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1199 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1200 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1201 1202 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1203 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1204 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1205 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1206 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1207 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1208 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1209 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1210 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 #undef __FUNCT__ 1215 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1216 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1217 { 1218 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1219 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1220 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1221 PetscErrorCode ierr; 1222 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1223 int fd; 1224 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1225 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1226 PetscScalar *column_values; 1227 PetscInt message_count,flowcontrolcount; 1228 FILE *file; 1229 1230 PetscFunctionBegin; 1231 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1232 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1233 nz = A->nz + B->nz; 1234 if (!rank) { 1235 header[0] = MAT_FILE_CLASSID; 1236 header[1] = mat->rmap->N; 1237 header[2] = mat->cmap->N; 1238 1239 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1240 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1241 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1242 /* get largest number of rows any processor has */ 1243 rlen = mat->rmap->n; 1244 range = mat->rmap->range; 1245 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1246 } else { 1247 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1248 rlen = mat->rmap->n; 1249 } 1250 1251 /* load up the local row counts */ 1252 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1253 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1254 1255 /* store the row lengths to the file */ 1256 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1257 if (!rank) { 1258 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1259 for (i=1; i<size; i++) { 1260 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1261 rlen = range[i+1] - range[i]; 1262 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1263 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 } 1265 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1266 } else { 1267 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1268 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1270 } 1271 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1272 1273 /* load up the local column indices */ 1274 nzmax = nz; /* th processor needs space a largest processor needs */ 1275 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1276 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1277 cnt = 0; 1278 for (i=0; i<mat->rmap->n; i++) { 1279 for (j=B->i[i]; j<B->i[i+1]; j++) { 1280 if ((col = garray[B->j[j]]) > cstart) break; 1281 column_indices[cnt++] = col; 1282 } 1283 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1284 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1285 } 1286 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1287 1288 /* store the column indices to the file */ 1289 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1290 if (!rank) { 1291 MPI_Status status; 1292 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1293 for (i=1; i<size; i++) { 1294 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1295 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1296 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1297 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1298 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1299 } 1300 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1301 } else { 1302 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1303 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1304 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1305 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1306 } 1307 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1308 1309 /* load up the local column values */ 1310 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1311 cnt = 0; 1312 for (i=0; i<mat->rmap->n; i++) { 1313 for (j=B->i[i]; j<B->i[i+1]; j++) { 1314 if (garray[B->j[j]] > cstart) break; 1315 column_values[cnt++] = B->a[j]; 1316 } 1317 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1318 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1319 } 1320 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1321 1322 /* store the column values to the file */ 1323 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1324 if (!rank) { 1325 MPI_Status status; 1326 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1327 for (i=1; i<size; i++) { 1328 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1329 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1330 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1331 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1332 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1333 } 1334 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1335 } else { 1336 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1337 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1338 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1339 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1340 } 1341 ierr = PetscFree(column_values);CHKERRQ(ierr); 1342 1343 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1344 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #include <petscdraw.h> 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1351 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1352 { 1353 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1354 PetscErrorCode ierr; 1355 PetscMPIInt rank = aij->rank,size = aij->size; 1356 PetscBool isdraw,iascii,isbinary; 1357 PetscViewer sviewer; 1358 PetscViewerFormat format; 1359 1360 PetscFunctionBegin; 1361 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1362 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1363 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1364 if (iascii) { 1365 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1366 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1367 MatInfo info; 1368 PetscBool inodes; 1369 1370 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1371 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1372 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1373 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1374 if (!inodes) { 1375 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1376 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1377 } else { 1378 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1379 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1380 } 1381 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1382 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1383 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1384 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1385 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1386 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1387 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1388 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1391 PetscInt inodecount,inodelimit,*inodes; 1392 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1393 if (inodes) { 1394 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1395 } else { 1396 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1397 } 1398 PetscFunctionReturn(0); 1399 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1400 PetscFunctionReturn(0); 1401 } 1402 } else if (isbinary) { 1403 if (size == 1) { 1404 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1405 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1406 } else { 1407 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1408 } 1409 PetscFunctionReturn(0); 1410 } else if (isdraw) { 1411 PetscDraw draw; 1412 PetscBool isnull; 1413 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1414 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1415 } 1416 1417 if (size == 1) { 1418 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1419 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1420 } else { 1421 /* assemble the entire matrix onto first processor. */ 1422 Mat A; 1423 Mat_SeqAIJ *Aloc; 1424 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1425 MatScalar *a; 1426 1427 if (mat->rmap->N > 1024) { 1428 PetscBool flg = PETSC_FALSE; 1429 1430 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1431 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1432 } 1433 1434 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1435 if (!rank) { 1436 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1437 } else { 1438 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1439 } 1440 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1441 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1442 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1443 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1444 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1445 1446 /* copy over the A part */ 1447 Aloc = (Mat_SeqAIJ*)aij->A->data; 1448 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1449 row = mat->rmap->rstart; 1450 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1451 for (i=0; i<m; i++) { 1452 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1453 row++; 1454 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1455 } 1456 aj = Aloc->j; 1457 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1458 1459 /* copy over the B part */ 1460 Aloc = (Mat_SeqAIJ*)aij->B->data; 1461 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1462 row = mat->rmap->rstart; 1463 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1464 ct = cols; 1465 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1466 for (i=0; i<m; i++) { 1467 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1468 row++; 1469 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1470 } 1471 ierr = PetscFree(ct);CHKERRQ(ierr); 1472 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1473 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1474 /* 1475 Everyone has to call to draw the matrix since the graphics waits are 1476 synchronized across all processors that share the PetscDraw object 1477 */ 1478 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1479 if (!rank) { 1480 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1481 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1482 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1483 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1484 } 1485 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1486 ierr = MatDestroy(&A);CHKERRQ(ierr); 1487 } 1488 PetscFunctionReturn(0); 1489 } 1490 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatView_MPIAIJ" 1493 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1494 { 1495 PetscErrorCode ierr; 1496 PetscBool iascii,isdraw,issocket,isbinary; 1497 1498 PetscFunctionBegin; 1499 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1500 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1501 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1502 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1503 if (iascii || isdraw || isbinary || issocket) { 1504 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1505 } 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatSOR_MPIAIJ" 1511 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1512 { 1513 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1514 PetscErrorCode ierr; 1515 Vec bb1 = 0; 1516 PetscBool hasop; 1517 1518 PetscFunctionBegin; 1519 if (flag == SOR_APPLY_UPPER) { 1520 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1521 PetscFunctionReturn(0); 1522 } 1523 1524 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1525 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1526 } 1527 1528 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1529 if (flag & SOR_ZERO_INITIAL_GUESS) { 1530 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1531 its--; 1532 } 1533 1534 while (its--) { 1535 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1536 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1537 1538 /* update rhs: bb1 = bb - B*x */ 1539 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1540 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1541 1542 /* local sweep */ 1543 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1544 } 1545 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1546 if (flag & SOR_ZERO_INITIAL_GUESS) { 1547 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1548 its--; 1549 } 1550 while (its--) { 1551 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1553 1554 /* update rhs: bb1 = bb - B*x */ 1555 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1556 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1557 1558 /* local sweep */ 1559 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1560 } 1561 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1562 if (flag & SOR_ZERO_INITIAL_GUESS) { 1563 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1564 its--; 1565 } 1566 while (its--) { 1567 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 1570 /* update rhs: bb1 = bb - B*x */ 1571 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1572 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1573 1574 /* local sweep */ 1575 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1576 } 1577 } else if (flag & SOR_EISENSTAT) { 1578 Vec xx1; 1579 1580 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1581 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1582 1583 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 if (!mat->diag) { 1586 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1587 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1588 } 1589 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1590 if (hasop) { 1591 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1592 } else { 1593 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1594 } 1595 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1596 1597 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1598 1599 /* local sweep */ 1600 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1601 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1602 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1603 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1604 1605 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatPermute_MPIAIJ" 1611 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1612 { 1613 Mat aA,aB,Aperm; 1614 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1615 PetscScalar *aa,*ba; 1616 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1617 PetscSF rowsf,sf; 1618 IS parcolp = NULL; 1619 PetscBool done; 1620 PetscErrorCode ierr; 1621 1622 PetscFunctionBegin; 1623 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1624 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1625 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1626 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1627 1628 /* Invert row permutation to find out where my rows should go */ 1629 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1630 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1631 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1632 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1633 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1634 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1635 1636 /* Invert column permutation to find out where my columns should go */ 1637 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1638 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1639 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1640 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1641 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1642 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1643 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1644 1645 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1646 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1647 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1648 1649 /* Find out where my gcols should go */ 1650 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1651 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1652 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1653 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1654 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1655 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1656 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1657 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1658 1659 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1660 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1661 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1662 for (i=0; i<m; i++) { 1663 PetscInt row = rdest[i],rowner; 1664 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1665 for (j=ai[i]; j<ai[i+1]; j++) { 1666 PetscInt cowner,col = cdest[aj[j]]; 1667 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1668 if (rowner == cowner) dnnz[i]++; 1669 else onnz[i]++; 1670 } 1671 for (j=bi[i]; j<bi[i+1]; j++) { 1672 PetscInt cowner,col = gcdest[bj[j]]; 1673 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1674 if (rowner == cowner) dnnz[i]++; 1675 else onnz[i]++; 1676 } 1677 } 1678 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1679 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1680 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1681 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1682 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1683 1684 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1685 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1686 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1687 for (i=0; i<m; i++) { 1688 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1689 PetscInt rowlen; 1690 rowlen = ai[i+1] - ai[i]; 1691 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1692 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1693 rowlen = bi[i+1] - bi[i]; 1694 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1695 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1696 } 1697 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1698 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1700 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1701 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1702 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1703 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1704 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1705 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1706 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1707 *B = Aperm; 1708 PetscFunctionReturn(0); 1709 } 1710 1711 #undef __FUNCT__ 1712 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1713 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1716 Mat A = mat->A,B = mat->B; 1717 PetscErrorCode ierr; 1718 PetscReal isend[5],irecv[5]; 1719 1720 PetscFunctionBegin; 1721 info->block_size = 1.0; 1722 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1723 1724 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1725 isend[3] = info->memory; isend[4] = info->mallocs; 1726 1727 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1728 1729 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1730 isend[3] += info->memory; isend[4] += info->mallocs; 1731 if (flag == MAT_LOCAL) { 1732 info->nz_used = isend[0]; 1733 info->nz_allocated = isend[1]; 1734 info->nz_unneeded = isend[2]; 1735 info->memory = isend[3]; 1736 info->mallocs = isend[4]; 1737 } else if (flag == MAT_GLOBAL_MAX) { 1738 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1739 1740 info->nz_used = irecv[0]; 1741 info->nz_allocated = irecv[1]; 1742 info->nz_unneeded = irecv[2]; 1743 info->memory = irecv[3]; 1744 info->mallocs = irecv[4]; 1745 } else if (flag == MAT_GLOBAL_SUM) { 1746 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1747 1748 info->nz_used = irecv[0]; 1749 info->nz_allocated = irecv[1]; 1750 info->nz_unneeded = irecv[2]; 1751 info->memory = irecv[3]; 1752 info->mallocs = irecv[4]; 1753 } 1754 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1755 info->fill_ratio_needed = 0; 1756 info->factor_mallocs = 0; 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatSetOption_MPIAIJ" 1762 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1763 { 1764 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1765 PetscErrorCode ierr; 1766 1767 PetscFunctionBegin; 1768 switch (op) { 1769 case MAT_NEW_NONZERO_LOCATIONS: 1770 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1771 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1772 case MAT_KEEP_NONZERO_PATTERN: 1773 case MAT_NEW_NONZERO_LOCATION_ERR: 1774 case MAT_USE_INODES: 1775 case MAT_IGNORE_ZERO_ENTRIES: 1776 MatCheckPreallocated(A,1); 1777 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1778 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1779 break; 1780 case MAT_ROW_ORIENTED: 1781 a->roworiented = flg; 1782 1783 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1784 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1785 break; 1786 case MAT_NEW_DIAGONALS: 1787 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1788 break; 1789 case MAT_IGNORE_OFF_PROC_ENTRIES: 1790 a->donotstash = flg; 1791 break; 1792 case MAT_SPD: 1793 A->spd_set = PETSC_TRUE; 1794 A->spd = flg; 1795 if (flg) { 1796 A->symmetric = PETSC_TRUE; 1797 A->structurally_symmetric = PETSC_TRUE; 1798 A->symmetric_set = PETSC_TRUE; 1799 A->structurally_symmetric_set = PETSC_TRUE; 1800 } 1801 break; 1802 case MAT_SYMMETRIC: 1803 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1804 break; 1805 case MAT_STRUCTURALLY_SYMMETRIC: 1806 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1807 break; 1808 case MAT_HERMITIAN: 1809 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1810 break; 1811 case MAT_SYMMETRY_ETERNAL: 1812 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1813 break; 1814 default: 1815 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1816 } 1817 PetscFunctionReturn(0); 1818 } 1819 1820 #undef __FUNCT__ 1821 #define __FUNCT__ "MatGetRow_MPIAIJ" 1822 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1823 { 1824 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1825 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1826 PetscErrorCode ierr; 1827 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1828 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1829 PetscInt *cmap,*idx_p; 1830 1831 PetscFunctionBegin; 1832 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1833 mat->getrowactive = PETSC_TRUE; 1834 1835 if (!mat->rowvalues && (idx || v)) { 1836 /* 1837 allocate enough space to hold information from the longest row. 1838 */ 1839 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1840 PetscInt max = 1,tmp; 1841 for (i=0; i<matin->rmap->n; i++) { 1842 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1843 if (max < tmp) max = tmp; 1844 } 1845 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1846 } 1847 1848 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1849 lrow = row - rstart; 1850 1851 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1852 if (!v) {pvA = 0; pvB = 0;} 1853 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1854 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1855 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1856 nztot = nzA + nzB; 1857 1858 cmap = mat->garray; 1859 if (v || idx) { 1860 if (nztot) { 1861 /* Sort by increasing column numbers, assuming A and B already sorted */ 1862 PetscInt imark = -1; 1863 if (v) { 1864 *v = v_p = mat->rowvalues; 1865 for (i=0; i<nzB; i++) { 1866 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1867 else break; 1868 } 1869 imark = i; 1870 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1871 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1872 } 1873 if (idx) { 1874 *idx = idx_p = mat->rowindices; 1875 if (imark > -1) { 1876 for (i=0; i<imark; i++) { 1877 idx_p[i] = cmap[cworkB[i]]; 1878 } 1879 } else { 1880 for (i=0; i<nzB; i++) { 1881 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1882 else break; 1883 } 1884 imark = i; 1885 } 1886 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1887 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1888 } 1889 } else { 1890 if (idx) *idx = 0; 1891 if (v) *v = 0; 1892 } 1893 } 1894 *nz = nztot; 1895 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1896 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1902 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1903 { 1904 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1905 1906 PetscFunctionBegin; 1907 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1908 aij->getrowactive = PETSC_FALSE; 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatNorm_MPIAIJ" 1914 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1915 { 1916 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1917 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1918 PetscErrorCode ierr; 1919 PetscInt i,j,cstart = mat->cmap->rstart; 1920 PetscReal sum = 0.0; 1921 MatScalar *v; 1922 1923 PetscFunctionBegin; 1924 if (aij->size == 1) { 1925 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1926 } else { 1927 if (type == NORM_FROBENIUS) { 1928 v = amat->a; 1929 for (i=0; i<amat->nz; i++) { 1930 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1931 } 1932 v = bmat->a; 1933 for (i=0; i<bmat->nz; i++) { 1934 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1935 } 1936 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1937 *norm = PetscSqrtReal(*norm); 1938 } else if (type == NORM_1) { /* max column norm */ 1939 PetscReal *tmp,*tmp2; 1940 PetscInt *jj,*garray = aij->garray; 1941 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1942 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1943 *norm = 0.0; 1944 v = amat->a; jj = amat->j; 1945 for (j=0; j<amat->nz; j++) { 1946 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1947 } 1948 v = bmat->a; jj = bmat->j; 1949 for (j=0; j<bmat->nz; j++) { 1950 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1951 } 1952 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1953 for (j=0; j<mat->cmap->N; j++) { 1954 if (tmp2[j] > *norm) *norm = tmp2[j]; 1955 } 1956 ierr = PetscFree(tmp);CHKERRQ(ierr); 1957 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1958 } else if (type == NORM_INFINITY) { /* max row norm */ 1959 PetscReal ntemp = 0.0; 1960 for (j=0; j<aij->A->rmap->n; j++) { 1961 v = amat->a + amat->i[j]; 1962 sum = 0.0; 1963 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1964 sum += PetscAbsScalar(*v); v++; 1965 } 1966 v = bmat->a + bmat->i[j]; 1967 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1968 sum += PetscAbsScalar(*v); v++; 1969 } 1970 if (sum > ntemp) ntemp = sum; 1971 } 1972 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1973 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1974 } 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "MatTranspose_MPIAIJ" 1980 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1981 { 1982 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1983 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1984 PetscErrorCode ierr; 1985 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1986 PetscInt cstart = A->cmap->rstart,ncol; 1987 Mat B; 1988 MatScalar *array; 1989 1990 PetscFunctionBegin; 1991 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1992 1993 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1994 ai = Aloc->i; aj = Aloc->j; 1995 bi = Bloc->i; bj = Bloc->j; 1996 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1997 PetscInt *d_nnz,*g_nnz,*o_nnz; 1998 PetscSFNode *oloc; 1999 PETSC_UNUSED PetscSF sf; 2000 2001 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2002 /* compute d_nnz for preallocation */ 2003 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2004 for (i=0; i<ai[ma]; i++) { 2005 d_nnz[aj[i]]++; 2006 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2007 } 2008 /* compute local off-diagonal contributions */ 2009 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2010 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2011 /* map those to global */ 2012 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2013 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2014 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2015 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2016 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2017 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2018 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2019 2020 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2021 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2022 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2023 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2024 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2025 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2026 } else { 2027 B = *matout; 2028 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2029 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2030 } 2031 2032 /* copy over the A part */ 2033 array = Aloc->a; 2034 row = A->rmap->rstart; 2035 for (i=0; i<ma; i++) { 2036 ncol = ai[i+1]-ai[i]; 2037 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2038 row++; 2039 array += ncol; aj += ncol; 2040 } 2041 aj = Aloc->j; 2042 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2043 2044 /* copy over the B part */ 2045 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2046 array = Bloc->a; 2047 row = A->rmap->rstart; 2048 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2049 cols_tmp = cols; 2050 for (i=0; i<mb; i++) { 2051 ncol = bi[i+1]-bi[i]; 2052 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2053 row++; 2054 array += ncol; cols_tmp += ncol; 2055 } 2056 ierr = PetscFree(cols);CHKERRQ(ierr); 2057 2058 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2059 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2060 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2061 *matout = B; 2062 } else { 2063 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2064 } 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2070 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2071 { 2072 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2073 Mat a = aij->A,b = aij->B; 2074 PetscErrorCode ierr; 2075 PetscInt s1,s2,s3; 2076 2077 PetscFunctionBegin; 2078 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2079 if (rr) { 2080 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2081 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2082 /* Overlap communication with computation. */ 2083 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 } 2085 if (ll) { 2086 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2087 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2088 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2089 } 2090 /* scale the diagonal block */ 2091 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2092 2093 if (rr) { 2094 /* Do a scatter end and then right scale the off-diagonal block */ 2095 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2096 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2097 } 2098 PetscFunctionReturn(0); 2099 } 2100 2101 #undef __FUNCT__ 2102 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2103 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2104 { 2105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2106 PetscErrorCode ierr; 2107 2108 PetscFunctionBegin; 2109 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2110 PetscFunctionReturn(0); 2111 } 2112 2113 #undef __FUNCT__ 2114 #define __FUNCT__ "MatEqual_MPIAIJ" 2115 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2116 { 2117 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2118 Mat a,b,c,d; 2119 PetscBool flg; 2120 PetscErrorCode ierr; 2121 2122 PetscFunctionBegin; 2123 a = matA->A; b = matA->B; 2124 c = matB->A; d = matB->B; 2125 2126 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2127 if (flg) { 2128 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2129 } 2130 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatCopy_MPIAIJ" 2136 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2137 { 2138 PetscErrorCode ierr; 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2140 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2141 2142 PetscFunctionBegin; 2143 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2144 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2145 /* because of the column compression in the off-processor part of the matrix a->B, 2146 the number of columns in a->B and b->B may be different, hence we cannot call 2147 the MatCopy() directly on the two parts. If need be, we can provide a more 2148 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2149 then copying the submatrices */ 2150 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2151 } else { 2152 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2153 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2154 } 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatSetUp_MPIAIJ" 2160 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2161 { 2162 PetscErrorCode ierr; 2163 2164 PetscFunctionBegin; 2165 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2171 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2172 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2173 { 2174 PetscInt i,m=Y->rmap->N; 2175 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2176 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2177 const PetscInt *xi = x->i,*yi = y->i; 2178 2179 PetscFunctionBegin; 2180 /* Set the number of nonzeros in the new matrix */ 2181 for (i=0; i<m; i++) { 2182 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2183 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2184 nnz[i] = 0; 2185 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2186 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2187 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2188 nnz[i]++; 2189 } 2190 for (; k<nzy; k++) nnz[i]++; 2191 } 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatAXPY_MPIAIJ" 2197 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2198 { 2199 PetscErrorCode ierr; 2200 PetscInt i; 2201 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2202 PetscBLASInt bnz,one=1; 2203 Mat_SeqAIJ *x,*y; 2204 2205 PetscFunctionBegin; 2206 if (str == SAME_NONZERO_PATTERN) { 2207 PetscScalar alpha = a; 2208 x = (Mat_SeqAIJ*)xx->A->data; 2209 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2210 y = (Mat_SeqAIJ*)yy->A->data; 2211 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2212 x = (Mat_SeqAIJ*)xx->B->data; 2213 y = (Mat_SeqAIJ*)yy->B->data; 2214 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2215 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2216 } else if (str == SUBSET_NONZERO_PATTERN) { 2217 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2218 2219 x = (Mat_SeqAIJ*)xx->B->data; 2220 y = (Mat_SeqAIJ*)yy->B->data; 2221 if (y->xtoy && y->XtoY != xx->B) { 2222 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2223 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2224 } 2225 if (!y->xtoy) { /* get xtoy */ 2226 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2227 y->XtoY = xx->B; 2228 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2229 } 2230 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2231 } else { 2232 Mat B; 2233 PetscInt *nnz_d,*nnz_o; 2234 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2235 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2236 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2237 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2238 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2239 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2240 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2241 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2242 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2243 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2244 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2245 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2246 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2247 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2248 } 2249 PetscFunctionReturn(0); 2250 } 2251 2252 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2253 2254 #undef __FUNCT__ 2255 #define __FUNCT__ "MatConjugate_MPIAIJ" 2256 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2257 { 2258 #if defined(PETSC_USE_COMPLEX) 2259 PetscErrorCode ierr; 2260 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2261 2262 PetscFunctionBegin; 2263 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2264 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2265 #else 2266 PetscFunctionBegin; 2267 #endif 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatRealPart_MPIAIJ" 2273 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2274 { 2275 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2276 PetscErrorCode ierr; 2277 2278 PetscFunctionBegin; 2279 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2280 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2281 PetscFunctionReturn(0); 2282 } 2283 2284 #undef __FUNCT__ 2285 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2286 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2287 { 2288 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2289 PetscErrorCode ierr; 2290 2291 PetscFunctionBegin; 2292 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2293 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2294 PetscFunctionReturn(0); 2295 } 2296 2297 #if defined(PETSC_HAVE_PBGL) 2298 2299 #include <boost/parallel/mpi/bsp_process_group.hpp> 2300 #include <boost/graph/distributed/ilu_default_graph.hpp> 2301 #include <boost/graph/distributed/ilu_0_block.hpp> 2302 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2303 #include <boost/graph/distributed/petsc/interface.hpp> 2304 #include <boost/multi_array.hpp> 2305 #include <boost/parallel/distributed_property_map->hpp> 2306 2307 #undef __FUNCT__ 2308 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2309 /* 2310 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2311 */ 2312 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2313 { 2314 namespace petsc = boost::distributed::petsc; 2315 2316 namespace graph_dist = boost::graph::distributed; 2317 using boost::graph::distributed::ilu_default::process_group_type; 2318 using boost::graph::ilu_permuted; 2319 2320 PetscBool row_identity, col_identity; 2321 PetscContainer c; 2322 PetscInt m, n, M, N; 2323 PetscErrorCode ierr; 2324 2325 PetscFunctionBegin; 2326 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2327 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2328 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2329 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2330 2331 process_group_type pg; 2332 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2333 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2334 lgraph_type& level_graph = *lgraph_p; 2335 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2336 2337 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2338 ilu_permuted(level_graph); 2339 2340 /* put together the new matrix */ 2341 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2342 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2343 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2344 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2345 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2346 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2347 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2348 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2349 2350 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2351 ierr = PetscContainerSetPointer(c, lgraph_p); 2352 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2353 ierr = PetscContainerDestroy(&c); 2354 PetscFunctionReturn(0); 2355 } 2356 2357 #undef __FUNCT__ 2358 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2359 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2360 { 2361 PetscFunctionBegin; 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatSolve_MPIAIJ" 2367 /* 2368 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2369 */ 2370 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2371 { 2372 namespace graph_dist = boost::graph::distributed; 2373 2374 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2375 lgraph_type *lgraph_p; 2376 PetscContainer c; 2377 PetscErrorCode ierr; 2378 2379 PetscFunctionBegin; 2380 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2381 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2382 ierr = VecCopy(b, x);CHKERRQ(ierr); 2383 2384 PetscScalar *array_x; 2385 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2386 PetscInt sx; 2387 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2388 2389 PetscScalar *array_b; 2390 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2391 PetscInt sb; 2392 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2393 2394 lgraph_type& level_graph = *lgraph_p; 2395 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2396 2397 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2398 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2399 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2400 2401 typedef boost::iterator_property_map<array_ref_type::iterator, 2402 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2403 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2404 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2405 2406 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2407 PetscFunctionReturn(0); 2408 } 2409 #endif 2410 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "MatDestroy_MatRedundant" 2413 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2414 { 2415 PetscErrorCode ierr; 2416 Mat_Redundant *redund; 2417 PetscInt i; 2418 PetscMPIInt size; 2419 2420 PetscFunctionBegin; 2421 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2422 if (size == 1) { 2423 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2424 redund = a->redundant; 2425 } else { 2426 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2427 redund = a->redundant; 2428 } 2429 if (redund){ 2430 if (redund->matseq) { /* via MatGetSubMatrices() */ 2431 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2432 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2433 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2434 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2435 } else { 2436 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2437 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2438 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2439 for (i=0; i<redund->nrecvs; i++) { 2440 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2441 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2442 } 2443 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2444 } 2445 2446 if (redund->psubcomm) { 2447 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2448 } 2449 ierr = redund->Destroy(A);CHKERRQ(ierr); 2450 ierr = PetscFree(redund);CHKERRQ(ierr); 2451 } 2452 PetscFunctionReturn(0); 2453 } 2454 2455 #undef __FUNCT__ 2456 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2457 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2458 { 2459 PetscMPIInt rank,size; 2460 MPI_Comm comm; 2461 PetscErrorCode ierr; 2462 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2463 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2464 PetscInt *rowrange = mat->rmap->range; 2465 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2466 Mat A = aij->A,B=aij->B,C=*matredundant; 2467 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2468 PetscScalar *sbuf_a; 2469 PetscInt nzlocal=a->nz+b->nz; 2470 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2471 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2472 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2473 MatScalar *aworkA,*aworkB; 2474 PetscScalar *vals; 2475 PetscMPIInt tag1,tag2,tag3,imdex; 2476 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2477 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2478 MPI_Status recv_status,*send_status; 2479 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2480 PetscInt **rbuf_j=NULL; 2481 PetscScalar **rbuf_a=NULL; 2482 Mat_Redundant *redund =NULL; 2483 2484 PetscFunctionBegin; 2485 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2486 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2487 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2488 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2489 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2490 2491 if (reuse == MAT_REUSE_MATRIX) { 2492 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2493 if (subsize == 1) { 2494 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2495 redund = c->redundant; 2496 } else { 2497 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2498 redund = c->redundant; 2499 } 2500 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2501 2502 nsends = redund->nsends; 2503 nrecvs = redund->nrecvs; 2504 send_rank = redund->send_rank; 2505 recv_rank = redund->recv_rank; 2506 sbuf_nz = redund->sbuf_nz; 2507 rbuf_nz = redund->rbuf_nz; 2508 sbuf_j = redund->sbuf_j; 2509 sbuf_a = redund->sbuf_a; 2510 rbuf_j = redund->rbuf_j; 2511 rbuf_a = redund->rbuf_a; 2512 } 2513 2514 if (reuse == MAT_INITIAL_MATRIX) { 2515 PetscInt nleftover,np_subcomm; 2516 2517 /* get the destination processors' id send_rank, nsends and nrecvs */ 2518 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2519 2520 np_subcomm = size/nsubcomm; 2521 nleftover = size - nsubcomm*np_subcomm; 2522 2523 /* block of codes below is specific for INTERLACED */ 2524 /* ------------------------------------------------*/ 2525 nsends = 0; nrecvs = 0; 2526 for (i=0; i<size; i++) { 2527 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2528 send_rank[nsends++] = i; 2529 recv_rank[nrecvs++] = i; 2530 } 2531 } 2532 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2533 i = size-nleftover-1; 2534 j = 0; 2535 while (j < nsubcomm - nleftover) { 2536 send_rank[nsends++] = i; 2537 i--; j++; 2538 } 2539 } 2540 2541 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2542 for (i=0; i<nleftover; i++) { 2543 recv_rank[nrecvs++] = size-nleftover+i; 2544 } 2545 } 2546 /*----------------------------------------------*/ 2547 2548 /* allocate sbuf_j, sbuf_a */ 2549 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2550 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2551 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2552 /* 2553 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2554 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2555 */ 2556 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2557 2558 /* copy mat's local entries into the buffers */ 2559 if (reuse == MAT_INITIAL_MATRIX) { 2560 rownz_max = 0; 2561 rptr = sbuf_j; 2562 cols = sbuf_j + rend-rstart + 1; 2563 vals = sbuf_a; 2564 rptr[0] = 0; 2565 for (i=0; i<rend-rstart; i++) { 2566 row = i + rstart; 2567 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2568 ncols = nzA + nzB; 2569 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2570 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2571 /* load the column indices for this row into cols */ 2572 lwrite = 0; 2573 for (l=0; l<nzB; l++) { 2574 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2575 vals[lwrite] = aworkB[l]; 2576 cols[lwrite++] = ctmp; 2577 } 2578 } 2579 for (l=0; l<nzA; l++) { 2580 vals[lwrite] = aworkA[l]; 2581 cols[lwrite++] = cstart + cworkA[l]; 2582 } 2583 for (l=0; l<nzB; l++) { 2584 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2585 vals[lwrite] = aworkB[l]; 2586 cols[lwrite++] = ctmp; 2587 } 2588 } 2589 vals += ncols; 2590 cols += ncols; 2591 rptr[i+1] = rptr[i] + ncols; 2592 if (rownz_max < ncols) rownz_max = ncols; 2593 } 2594 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2595 } else { /* only copy matrix values into sbuf_a */ 2596 rptr = sbuf_j; 2597 vals = sbuf_a; 2598 rptr[0] = 0; 2599 for (i=0; i<rend-rstart; i++) { 2600 row = i + rstart; 2601 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2602 ncols = nzA + nzB; 2603 cworkB = b->j + b->i[i]; 2604 aworkA = a->a + a->i[i]; 2605 aworkB = b->a + b->i[i]; 2606 lwrite = 0; 2607 for (l=0; l<nzB; l++) { 2608 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2609 } 2610 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2611 for (l=0; l<nzB; l++) { 2612 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2613 } 2614 vals += ncols; 2615 rptr[i+1] = rptr[i] + ncols; 2616 } 2617 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2618 2619 /* send nzlocal to others, and recv other's nzlocal */ 2620 /*--------------------------------------------------*/ 2621 if (reuse == MAT_INITIAL_MATRIX) { 2622 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2623 2624 s_waits2 = s_waits3 + nsends; 2625 s_waits1 = s_waits2 + nsends; 2626 r_waits1 = s_waits1 + nsends; 2627 r_waits2 = r_waits1 + nrecvs; 2628 r_waits3 = r_waits2 + nrecvs; 2629 } else { 2630 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2631 2632 r_waits3 = s_waits3 + nsends; 2633 } 2634 2635 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2636 if (reuse == MAT_INITIAL_MATRIX) { 2637 /* get new tags to keep the communication clean */ 2638 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2639 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2640 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2641 2642 /* post receives of other's nzlocal */ 2643 for (i=0; i<nrecvs; i++) { 2644 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2645 } 2646 /* send nzlocal to others */ 2647 for (i=0; i<nsends; i++) { 2648 sbuf_nz[i] = nzlocal; 2649 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2650 } 2651 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2652 count = nrecvs; 2653 while (count) { 2654 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2655 2656 recv_rank[imdex] = recv_status.MPI_SOURCE; 2657 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2658 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2659 2660 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2661 2662 rbuf_nz[imdex] += i + 2; 2663 2664 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2665 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2666 count--; 2667 } 2668 /* wait on sends of nzlocal */ 2669 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2670 /* send mat->i,j to others, and recv from other's */ 2671 /*------------------------------------------------*/ 2672 for (i=0; i<nsends; i++) { 2673 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2674 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2675 } 2676 /* wait on receives of mat->i,j */ 2677 /*------------------------------*/ 2678 count = nrecvs; 2679 while (count) { 2680 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2681 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2682 count--; 2683 } 2684 /* wait on sends of mat->i,j */ 2685 /*---------------------------*/ 2686 if (nsends) { 2687 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2688 } 2689 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2690 2691 /* post receives, send and receive mat->a */ 2692 /*----------------------------------------*/ 2693 for (imdex=0; imdex<nrecvs; imdex++) { 2694 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2695 } 2696 for (i=0; i<nsends; i++) { 2697 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2698 } 2699 count = nrecvs; 2700 while (count) { 2701 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2702 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2703 count--; 2704 } 2705 if (nsends) { 2706 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2707 } 2708 2709 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2710 2711 /* create redundant matrix */ 2712 /*-------------------------*/ 2713 if (reuse == MAT_INITIAL_MATRIX) { 2714 const PetscInt *range; 2715 PetscInt rstart_sub,rend_sub,mloc_sub; 2716 2717 /* compute rownz_max for preallocation */ 2718 for (imdex=0; imdex<nrecvs; imdex++) { 2719 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2720 rptr = rbuf_j[imdex]; 2721 for (i=0; i<j; i++) { 2722 ncols = rptr[i+1] - rptr[i]; 2723 if (rownz_max < ncols) rownz_max = ncols; 2724 } 2725 } 2726 2727 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2728 2729 /* get local size of redundant matrix 2730 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2731 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2732 rstart_sub = range[nsubcomm*subrank]; 2733 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2734 rend_sub = range[nsubcomm*(subrank+1)]; 2735 } else { 2736 rend_sub = mat->rmap->N; 2737 } 2738 mloc_sub = rend_sub - rstart_sub; 2739 2740 if (M == N) { 2741 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2742 } else { /* non-square matrix */ 2743 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2744 } 2745 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2746 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2747 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2748 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2749 } else { 2750 C = *matredundant; 2751 } 2752 2753 /* insert local matrix entries */ 2754 rptr = sbuf_j; 2755 cols = sbuf_j + rend-rstart + 1; 2756 vals = sbuf_a; 2757 for (i=0; i<rend-rstart; i++) { 2758 row = i + rstart; 2759 ncols = rptr[i+1] - rptr[i]; 2760 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2761 vals += ncols; 2762 cols += ncols; 2763 } 2764 /* insert received matrix entries */ 2765 for (imdex=0; imdex<nrecvs; imdex++) { 2766 rstart = rowrange[recv_rank[imdex]]; 2767 rend = rowrange[recv_rank[imdex]+1]; 2768 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2769 rptr = rbuf_j[imdex]; 2770 cols = rbuf_j[imdex] + rend-rstart + 1; 2771 vals = rbuf_a[imdex]; 2772 for (i=0; i<rend-rstart; i++) { 2773 row = i + rstart; 2774 ncols = rptr[i+1] - rptr[i]; 2775 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2776 vals += ncols; 2777 cols += ncols; 2778 } 2779 } 2780 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2781 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2782 2783 if (reuse == MAT_INITIAL_MATRIX) { 2784 *matredundant = C; 2785 2786 /* create a supporting struct and attach it to C for reuse */ 2787 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2788 if (subsize == 1) { 2789 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2790 c->redundant = redund; 2791 } else { 2792 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2793 c->redundant = redund; 2794 } 2795 2796 redund->nzlocal = nzlocal; 2797 redund->nsends = nsends; 2798 redund->nrecvs = nrecvs; 2799 redund->send_rank = send_rank; 2800 redund->recv_rank = recv_rank; 2801 redund->sbuf_nz = sbuf_nz; 2802 redund->rbuf_nz = rbuf_nz; 2803 redund->sbuf_j = sbuf_j; 2804 redund->sbuf_a = sbuf_a; 2805 redund->rbuf_j = rbuf_j; 2806 redund->rbuf_a = rbuf_a; 2807 redund->psubcomm = NULL; 2808 2809 redund->Destroy = C->ops->destroy; 2810 C->ops->destroy = MatDestroy_MatRedundant; 2811 } 2812 PetscFunctionReturn(0); 2813 } 2814 2815 #undef __FUNCT__ 2816 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2817 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2818 { 2819 PetscErrorCode ierr; 2820 MPI_Comm comm; 2821 PetscMPIInt size,subsize; 2822 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2823 Mat_Redundant *redund=NULL; 2824 PetscSubcomm psubcomm=NULL; 2825 MPI_Comm subcomm_in=subcomm; 2826 Mat *matseq; 2827 IS isrow,iscol; 2828 2829 PetscFunctionBegin; 2830 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2831 if (reuse == MAT_INITIAL_MATRIX) { 2832 /* create psubcomm, then get subcomm */ 2833 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2834 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2835 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2836 2837 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2838 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2839 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2840 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2841 subcomm = psubcomm->comm; 2842 } else { /* retrieve psubcomm and subcomm */ 2843 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2844 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2845 if (subsize == 1) { 2846 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2847 redund = c->redundant; 2848 } else { 2849 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2850 redund = c->redundant; 2851 } 2852 psubcomm = redund->psubcomm; 2853 } 2854 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2855 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2856 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2857 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2858 if (subsize == 1) { 2859 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2860 c->redundant->psubcomm = psubcomm; 2861 } else { 2862 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2863 c->redundant->psubcomm = psubcomm ; 2864 } 2865 } 2866 PetscFunctionReturn(0); 2867 } 2868 } 2869 2870 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2871 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2872 if (reuse == MAT_INITIAL_MATRIX) { 2873 /* create a local sequential matrix matseq[0] */ 2874 mloc_sub = PETSC_DECIDE; 2875 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2876 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2877 rstart = rend - mloc_sub; 2878 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2879 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2880 } else { /* reuse == MAT_REUSE_MATRIX */ 2881 if (subsize == 1) { 2882 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2883 redund = c->redundant; 2884 } else { 2885 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2886 redund = c->redundant; 2887 } 2888 2889 isrow = redund->isrow; 2890 iscol = redund->iscol; 2891 matseq = redund->matseq; 2892 } 2893 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2894 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2895 2896 if (reuse == MAT_INITIAL_MATRIX) { 2897 /* create a supporting struct and attach it to C for reuse */ 2898 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2899 if (subsize == 1) { 2900 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2901 c->redundant = redund; 2902 } else { 2903 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2904 c->redundant = redund; 2905 } 2906 redund->isrow = isrow; 2907 redund->iscol = iscol; 2908 redund->matseq = matseq; 2909 redund->psubcomm = psubcomm; 2910 redund->Destroy = (*matredundant)->ops->destroy; 2911 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2912 } 2913 PetscFunctionReturn(0); 2914 } 2915 2916 #undef __FUNCT__ 2917 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2918 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2919 { 2920 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2921 PetscErrorCode ierr; 2922 PetscInt i,*idxb = 0; 2923 PetscScalar *va,*vb; 2924 Vec vtmp; 2925 2926 PetscFunctionBegin; 2927 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2928 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2929 if (idx) { 2930 for (i=0; i<A->rmap->n; i++) { 2931 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2932 } 2933 } 2934 2935 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2936 if (idx) { 2937 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2938 } 2939 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2940 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2941 2942 for (i=0; i<A->rmap->n; i++) { 2943 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2944 va[i] = vb[i]; 2945 if (idx) idx[i] = a->garray[idxb[i]]; 2946 } 2947 } 2948 2949 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2950 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2951 ierr = PetscFree(idxb);CHKERRQ(ierr); 2952 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2953 PetscFunctionReturn(0); 2954 } 2955 2956 #undef __FUNCT__ 2957 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2958 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2959 { 2960 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2961 PetscErrorCode ierr; 2962 PetscInt i,*idxb = 0; 2963 PetscScalar *va,*vb; 2964 Vec vtmp; 2965 2966 PetscFunctionBegin; 2967 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2968 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2969 if (idx) { 2970 for (i=0; i<A->cmap->n; i++) { 2971 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2972 } 2973 } 2974 2975 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2976 if (idx) { 2977 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2978 } 2979 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2980 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2981 2982 for (i=0; i<A->rmap->n; i++) { 2983 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2984 va[i] = vb[i]; 2985 if (idx) idx[i] = a->garray[idxb[i]]; 2986 } 2987 } 2988 2989 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2990 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2991 ierr = PetscFree(idxb);CHKERRQ(ierr); 2992 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2993 PetscFunctionReturn(0); 2994 } 2995 2996 #undef __FUNCT__ 2997 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2998 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2999 { 3000 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3001 PetscInt n = A->rmap->n; 3002 PetscInt cstart = A->cmap->rstart; 3003 PetscInt *cmap = mat->garray; 3004 PetscInt *diagIdx, *offdiagIdx; 3005 Vec diagV, offdiagV; 3006 PetscScalar *a, *diagA, *offdiagA; 3007 PetscInt r; 3008 PetscErrorCode ierr; 3009 3010 PetscFunctionBegin; 3011 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 3012 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3013 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3014 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3015 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3016 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3017 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3018 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3019 for (r = 0; r < n; ++r) { 3020 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3021 a[r] = diagA[r]; 3022 idx[r] = cstart + diagIdx[r]; 3023 } else { 3024 a[r] = offdiagA[r]; 3025 idx[r] = cmap[offdiagIdx[r]]; 3026 } 3027 } 3028 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3029 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3030 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3031 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3032 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3033 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3034 PetscFunctionReturn(0); 3035 } 3036 3037 #undef __FUNCT__ 3038 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3039 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3040 { 3041 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3042 PetscInt n = A->rmap->n; 3043 PetscInt cstart = A->cmap->rstart; 3044 PetscInt *cmap = mat->garray; 3045 PetscInt *diagIdx, *offdiagIdx; 3046 Vec diagV, offdiagV; 3047 PetscScalar *a, *diagA, *offdiagA; 3048 PetscInt r; 3049 PetscErrorCode ierr; 3050 3051 PetscFunctionBegin; 3052 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 3053 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3054 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3055 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3056 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3057 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3058 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3059 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3060 for (r = 0; r < n; ++r) { 3061 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3062 a[r] = diagA[r]; 3063 idx[r] = cstart + diagIdx[r]; 3064 } else { 3065 a[r] = offdiagA[r]; 3066 idx[r] = cmap[offdiagIdx[r]]; 3067 } 3068 } 3069 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3070 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3071 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3072 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3073 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3074 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3075 PetscFunctionReturn(0); 3076 } 3077 3078 #undef __FUNCT__ 3079 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3080 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3081 { 3082 PetscErrorCode ierr; 3083 Mat *dummy; 3084 3085 PetscFunctionBegin; 3086 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3087 *newmat = *dummy; 3088 ierr = PetscFree(dummy);CHKERRQ(ierr); 3089 PetscFunctionReturn(0); 3090 } 3091 3092 #undef __FUNCT__ 3093 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3094 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3095 { 3096 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3097 PetscErrorCode ierr; 3098 3099 PetscFunctionBegin; 3100 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3101 PetscFunctionReturn(0); 3102 } 3103 3104 #undef __FUNCT__ 3105 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3106 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3107 { 3108 PetscErrorCode ierr; 3109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3110 3111 PetscFunctionBegin; 3112 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3113 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3114 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3115 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3116 PetscFunctionReturn(0); 3117 } 3118 3119 /* -------------------------------------------------------------------*/ 3120 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3121 MatGetRow_MPIAIJ, 3122 MatRestoreRow_MPIAIJ, 3123 MatMult_MPIAIJ, 3124 /* 4*/ MatMultAdd_MPIAIJ, 3125 MatMultTranspose_MPIAIJ, 3126 MatMultTransposeAdd_MPIAIJ, 3127 #if defined(PETSC_HAVE_PBGL) 3128 MatSolve_MPIAIJ, 3129 #else 3130 0, 3131 #endif 3132 0, 3133 0, 3134 /*10*/ 0, 3135 0, 3136 0, 3137 MatSOR_MPIAIJ, 3138 MatTranspose_MPIAIJ, 3139 /*15*/ MatGetInfo_MPIAIJ, 3140 MatEqual_MPIAIJ, 3141 MatGetDiagonal_MPIAIJ, 3142 MatDiagonalScale_MPIAIJ, 3143 MatNorm_MPIAIJ, 3144 /*20*/ MatAssemblyBegin_MPIAIJ, 3145 MatAssemblyEnd_MPIAIJ, 3146 MatSetOption_MPIAIJ, 3147 MatZeroEntries_MPIAIJ, 3148 /*24*/ MatZeroRows_MPIAIJ, 3149 0, 3150 #if defined(PETSC_HAVE_PBGL) 3151 0, 3152 #else 3153 0, 3154 #endif 3155 0, 3156 0, 3157 /*29*/ MatSetUp_MPIAIJ, 3158 #if defined(PETSC_HAVE_PBGL) 3159 0, 3160 #else 3161 0, 3162 #endif 3163 0, 3164 0, 3165 0, 3166 /*34*/ MatDuplicate_MPIAIJ, 3167 0, 3168 0, 3169 0, 3170 0, 3171 /*39*/ MatAXPY_MPIAIJ, 3172 MatGetSubMatrices_MPIAIJ, 3173 MatIncreaseOverlap_MPIAIJ, 3174 MatGetValues_MPIAIJ, 3175 MatCopy_MPIAIJ, 3176 /*44*/ MatGetRowMax_MPIAIJ, 3177 MatScale_MPIAIJ, 3178 0, 3179 0, 3180 MatZeroRowsColumns_MPIAIJ, 3181 /*49*/ MatSetRandom_MPIAIJ, 3182 0, 3183 0, 3184 0, 3185 0, 3186 /*54*/ MatFDColoringCreate_MPIXAIJ, 3187 0, 3188 MatSetUnfactored_MPIAIJ, 3189 MatPermute_MPIAIJ, 3190 0, 3191 /*59*/ MatGetSubMatrix_MPIAIJ, 3192 MatDestroy_MPIAIJ, 3193 MatView_MPIAIJ, 3194 0, 3195 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3196 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3197 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3198 0, 3199 0, 3200 0, 3201 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3202 MatGetRowMinAbs_MPIAIJ, 3203 0, 3204 MatSetColoring_MPIAIJ, 3205 0, 3206 MatSetValuesAdifor_MPIAIJ, 3207 /*75*/ MatFDColoringApply_AIJ, 3208 0, 3209 0, 3210 0, 3211 MatFindZeroDiagonals_MPIAIJ, 3212 /*80*/ 0, 3213 0, 3214 0, 3215 /*83*/ MatLoad_MPIAIJ, 3216 0, 3217 0, 3218 0, 3219 0, 3220 0, 3221 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3222 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3223 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3224 MatPtAP_MPIAIJ_MPIAIJ, 3225 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3226 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3227 0, 3228 0, 3229 0, 3230 0, 3231 /*99*/ 0, 3232 0, 3233 0, 3234 MatConjugate_MPIAIJ, 3235 0, 3236 /*104*/MatSetValuesRow_MPIAIJ, 3237 MatRealPart_MPIAIJ, 3238 MatImaginaryPart_MPIAIJ, 3239 0, 3240 0, 3241 /*109*/0, 3242 MatGetRedundantMatrix_MPIAIJ, 3243 MatGetRowMin_MPIAIJ, 3244 0, 3245 0, 3246 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3247 0, 3248 0, 3249 0, 3250 0, 3251 /*119*/0, 3252 0, 3253 0, 3254 0, 3255 MatGetMultiProcBlock_MPIAIJ, 3256 /*124*/MatFindNonzeroRows_MPIAIJ, 3257 MatGetColumnNorms_MPIAIJ, 3258 MatInvertBlockDiagonal_MPIAIJ, 3259 0, 3260 MatGetSubMatricesParallel_MPIAIJ, 3261 /*129*/0, 3262 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3263 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3264 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3265 0, 3266 /*134*/0, 3267 0, 3268 0, 3269 0, 3270 0, 3271 /*139*/0, 3272 0, 3273 0, 3274 MatFDColoringSetUp_MPIXAIJ 3275 }; 3276 3277 /* ----------------------------------------------------------------------------------------*/ 3278 3279 #undef __FUNCT__ 3280 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3281 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3282 { 3283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3284 PetscErrorCode ierr; 3285 3286 PetscFunctionBegin; 3287 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3288 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3289 PetscFunctionReturn(0); 3290 } 3291 3292 #undef __FUNCT__ 3293 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3294 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3295 { 3296 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3297 PetscErrorCode ierr; 3298 3299 PetscFunctionBegin; 3300 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3301 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3302 PetscFunctionReturn(0); 3303 } 3304 3305 #undef __FUNCT__ 3306 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3307 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3308 { 3309 Mat_MPIAIJ *b; 3310 PetscErrorCode ierr; 3311 3312 PetscFunctionBegin; 3313 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3314 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3315 b = (Mat_MPIAIJ*)B->data; 3316 3317 if (!B->preallocated) { 3318 /* Explicitly create 2 MATSEQAIJ matrices. */ 3319 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3320 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3321 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3322 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3323 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3324 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3325 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3326 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3327 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3328 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3329 } 3330 3331 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3332 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3333 B->preallocated = PETSC_TRUE; 3334 PetscFunctionReturn(0); 3335 } 3336 3337 #undef __FUNCT__ 3338 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3339 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3340 { 3341 Mat mat; 3342 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3343 PetscErrorCode ierr; 3344 3345 PetscFunctionBegin; 3346 *newmat = 0; 3347 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3348 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3349 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3350 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3351 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3352 a = (Mat_MPIAIJ*)mat->data; 3353 3354 mat->factortype = matin->factortype; 3355 mat->rmap->bs = matin->rmap->bs; 3356 mat->cmap->bs = matin->cmap->bs; 3357 mat->assembled = PETSC_TRUE; 3358 mat->insertmode = NOT_SET_VALUES; 3359 mat->preallocated = PETSC_TRUE; 3360 3361 a->size = oldmat->size; 3362 a->rank = oldmat->rank; 3363 a->donotstash = oldmat->donotstash; 3364 a->roworiented = oldmat->roworiented; 3365 a->rowindices = 0; 3366 a->rowvalues = 0; 3367 a->getrowactive = PETSC_FALSE; 3368 3369 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3370 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3371 3372 if (oldmat->colmap) { 3373 #if defined(PETSC_USE_CTABLE) 3374 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3375 #else 3376 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3377 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3378 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3379 #endif 3380 } else a->colmap = 0; 3381 if (oldmat->garray) { 3382 PetscInt len; 3383 len = oldmat->B->cmap->n; 3384 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3385 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3386 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3387 } else a->garray = 0; 3388 3389 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3390 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3391 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3392 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3393 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3394 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3395 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3396 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3397 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3398 *newmat = mat; 3399 PetscFunctionReturn(0); 3400 } 3401 3402 3403 3404 #undef __FUNCT__ 3405 #define __FUNCT__ "MatLoad_MPIAIJ" 3406 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3407 { 3408 PetscScalar *vals,*svals; 3409 MPI_Comm comm; 3410 PetscErrorCode ierr; 3411 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3412 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3413 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3414 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3415 PetscInt cend,cstart,n,*rowners,sizesset=1; 3416 int fd; 3417 PetscInt bs = 1; 3418 3419 PetscFunctionBegin; 3420 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3421 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3422 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3423 if (!rank) { 3424 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3425 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3426 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3427 } 3428 3429 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3430 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3431 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3432 3433 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3434 3435 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3436 M = header[1]; N = header[2]; 3437 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3438 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3439 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3440 3441 /* If global sizes are set, check if they are consistent with that given in the file */ 3442 if (sizesset) { 3443 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3444 } 3445 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3446 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3447 3448 /* determine ownership of all (block) rows */ 3449 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3450 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3451 else m = newMat->rmap->n; /* Set by user */ 3452 3453 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3454 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3455 3456 /* First process needs enough room for process with most rows */ 3457 if (!rank) { 3458 mmax = rowners[1]; 3459 for (i=2; i<=size; i++) { 3460 mmax = PetscMax(mmax, rowners[i]); 3461 } 3462 } else mmax = -1; /* unused, but compilers complain */ 3463 3464 rowners[0] = 0; 3465 for (i=2; i<=size; i++) { 3466 rowners[i] += rowners[i-1]; 3467 } 3468 rstart = rowners[rank]; 3469 rend = rowners[rank+1]; 3470 3471 /* distribute row lengths to all processors */ 3472 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3473 if (!rank) { 3474 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3475 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3476 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3477 for (j=0; j<m; j++) { 3478 procsnz[0] += ourlens[j]; 3479 } 3480 for (i=1; i<size; i++) { 3481 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3482 /* calculate the number of nonzeros on each processor */ 3483 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3484 procsnz[i] += rowlengths[j]; 3485 } 3486 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3487 } 3488 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3489 } else { 3490 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3491 } 3492 3493 if (!rank) { 3494 /* determine max buffer needed and allocate it */ 3495 maxnz = 0; 3496 for (i=0; i<size; i++) { 3497 maxnz = PetscMax(maxnz,procsnz[i]); 3498 } 3499 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3500 3501 /* read in my part of the matrix column indices */ 3502 nz = procsnz[0]; 3503 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3504 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3505 3506 /* read in every one elses and ship off */ 3507 for (i=1; i<size; i++) { 3508 nz = procsnz[i]; 3509 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3510 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3511 } 3512 ierr = PetscFree(cols);CHKERRQ(ierr); 3513 } else { 3514 /* determine buffer space needed for message */ 3515 nz = 0; 3516 for (i=0; i<m; i++) { 3517 nz += ourlens[i]; 3518 } 3519 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3520 3521 /* receive message of column indices*/ 3522 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3523 } 3524 3525 /* determine column ownership if matrix is not square */ 3526 if (N != M) { 3527 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3528 else n = newMat->cmap->n; 3529 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3530 cstart = cend - n; 3531 } else { 3532 cstart = rstart; 3533 cend = rend; 3534 n = cend - cstart; 3535 } 3536 3537 /* loop over local rows, determining number of off diagonal entries */ 3538 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3539 jj = 0; 3540 for (i=0; i<m; i++) { 3541 for (j=0; j<ourlens[i]; j++) { 3542 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3543 jj++; 3544 } 3545 } 3546 3547 for (i=0; i<m; i++) { 3548 ourlens[i] -= offlens[i]; 3549 } 3550 if (!sizesset) { 3551 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3552 } 3553 3554 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3555 3556 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3557 3558 for (i=0; i<m; i++) { 3559 ourlens[i] += offlens[i]; 3560 } 3561 3562 if (!rank) { 3563 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3564 3565 /* read in my part of the matrix numerical values */ 3566 nz = procsnz[0]; 3567 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3568 3569 /* insert into matrix */ 3570 jj = rstart; 3571 smycols = mycols; 3572 svals = vals; 3573 for (i=0; i<m; i++) { 3574 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3575 smycols += ourlens[i]; 3576 svals += ourlens[i]; 3577 jj++; 3578 } 3579 3580 /* read in other processors and ship out */ 3581 for (i=1; i<size; i++) { 3582 nz = procsnz[i]; 3583 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3584 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3585 } 3586 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3587 } else { 3588 /* receive numeric values */ 3589 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3590 3591 /* receive message of values*/ 3592 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3593 3594 /* insert into matrix */ 3595 jj = rstart; 3596 smycols = mycols; 3597 svals = vals; 3598 for (i=0; i<m; i++) { 3599 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3600 smycols += ourlens[i]; 3601 svals += ourlens[i]; 3602 jj++; 3603 } 3604 } 3605 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3606 ierr = PetscFree(vals);CHKERRQ(ierr); 3607 ierr = PetscFree(mycols);CHKERRQ(ierr); 3608 ierr = PetscFree(rowners);CHKERRQ(ierr); 3609 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3610 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3611 PetscFunctionReturn(0); 3612 } 3613 3614 #undef __FUNCT__ 3615 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3616 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3617 { 3618 PetscErrorCode ierr; 3619 IS iscol_local; 3620 PetscInt csize; 3621 3622 PetscFunctionBegin; 3623 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3624 if (call == MAT_REUSE_MATRIX) { 3625 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3626 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3627 } else { 3628 PetscInt cbs; 3629 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3630 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3631 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3632 } 3633 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3634 if (call == MAT_INITIAL_MATRIX) { 3635 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3636 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3637 } 3638 PetscFunctionReturn(0); 3639 } 3640 3641 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3642 #undef __FUNCT__ 3643 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3644 /* 3645 Not great since it makes two copies of the submatrix, first an SeqAIJ 3646 in local and then by concatenating the local matrices the end result. 3647 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3648 3649 Note: This requires a sequential iscol with all indices. 3650 */ 3651 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3652 { 3653 PetscErrorCode ierr; 3654 PetscMPIInt rank,size; 3655 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3656 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3657 PetscBool allcolumns, colflag; 3658 Mat M,Mreuse; 3659 MatScalar *vwork,*aa; 3660 MPI_Comm comm; 3661 Mat_SeqAIJ *aij; 3662 3663 PetscFunctionBegin; 3664 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3665 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3666 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3667 3668 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3669 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3670 if (colflag && ncol == mat->cmap->N) { 3671 allcolumns = PETSC_TRUE; 3672 } else { 3673 allcolumns = PETSC_FALSE; 3674 } 3675 if (call == MAT_REUSE_MATRIX) { 3676 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3677 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3678 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3679 } else { 3680 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3681 } 3682 3683 /* 3684 m - number of local rows 3685 n - number of columns (same on all processors) 3686 rstart - first row in new global matrix generated 3687 */ 3688 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3689 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3690 if (call == MAT_INITIAL_MATRIX) { 3691 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3692 ii = aij->i; 3693 jj = aij->j; 3694 3695 /* 3696 Determine the number of non-zeros in the diagonal and off-diagonal 3697 portions of the matrix in order to do correct preallocation 3698 */ 3699 3700 /* first get start and end of "diagonal" columns */ 3701 if (csize == PETSC_DECIDE) { 3702 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3703 if (mglobal == n) { /* square matrix */ 3704 nlocal = m; 3705 } else { 3706 nlocal = n/size + ((n % size) > rank); 3707 } 3708 } else { 3709 nlocal = csize; 3710 } 3711 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3712 rstart = rend - nlocal; 3713 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3714 3715 /* next, compute all the lengths */ 3716 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3717 olens = dlens + m; 3718 for (i=0; i<m; i++) { 3719 jend = ii[i+1] - ii[i]; 3720 olen = 0; 3721 dlen = 0; 3722 for (j=0; j<jend; j++) { 3723 if (*jj < rstart || *jj >= rend) olen++; 3724 else dlen++; 3725 jj++; 3726 } 3727 olens[i] = olen; 3728 dlens[i] = dlen; 3729 } 3730 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3731 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3732 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3733 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3734 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3735 ierr = PetscFree(dlens);CHKERRQ(ierr); 3736 } else { 3737 PetscInt ml,nl; 3738 3739 M = *newmat; 3740 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3741 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3742 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3743 /* 3744 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3745 rather than the slower MatSetValues(). 3746 */ 3747 M->was_assembled = PETSC_TRUE; 3748 M->assembled = PETSC_FALSE; 3749 } 3750 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3751 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3752 ii = aij->i; 3753 jj = aij->j; 3754 aa = aij->a; 3755 for (i=0; i<m; i++) { 3756 row = rstart + i; 3757 nz = ii[i+1] - ii[i]; 3758 cwork = jj; jj += nz; 3759 vwork = aa; aa += nz; 3760 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3761 } 3762 3763 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3764 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3765 *newmat = M; 3766 3767 /* save submatrix used in processor for next request */ 3768 if (call == MAT_INITIAL_MATRIX) { 3769 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3770 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3771 } 3772 PetscFunctionReturn(0); 3773 } 3774 3775 #undef __FUNCT__ 3776 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3777 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3778 { 3779 PetscInt m,cstart, cend,j,nnz,i,d; 3780 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3781 const PetscInt *JJ; 3782 PetscScalar *values; 3783 PetscErrorCode ierr; 3784 3785 PetscFunctionBegin; 3786 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3787 3788 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3789 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3790 m = B->rmap->n; 3791 cstart = B->cmap->rstart; 3792 cend = B->cmap->rend; 3793 rstart = B->rmap->rstart; 3794 3795 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3796 3797 #if defined(PETSC_USE_DEBUGGING) 3798 for (i=0; i<m; i++) { 3799 nnz = Ii[i+1]- Ii[i]; 3800 JJ = J + Ii[i]; 3801 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3802 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3803 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3804 } 3805 #endif 3806 3807 for (i=0; i<m; i++) { 3808 nnz = Ii[i+1]- Ii[i]; 3809 JJ = J + Ii[i]; 3810 nnz_max = PetscMax(nnz_max,nnz); 3811 d = 0; 3812 for (j=0; j<nnz; j++) { 3813 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3814 } 3815 d_nnz[i] = d; 3816 o_nnz[i] = nnz - d; 3817 } 3818 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3819 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3820 3821 if (v) values = (PetscScalar*)v; 3822 else { 3823 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3824 } 3825 3826 for (i=0; i<m; i++) { 3827 ii = i + rstart; 3828 nnz = Ii[i+1]- Ii[i]; 3829 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3830 } 3831 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3832 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3833 3834 if (!v) { 3835 ierr = PetscFree(values);CHKERRQ(ierr); 3836 } 3837 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3838 PetscFunctionReturn(0); 3839 } 3840 3841 #undef __FUNCT__ 3842 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3843 /*@ 3844 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3845 (the default parallel PETSc format). 3846 3847 Collective on MPI_Comm 3848 3849 Input Parameters: 3850 + B - the matrix 3851 . i - the indices into j for the start of each local row (starts with zero) 3852 . j - the column indices for each local row (starts with zero) 3853 - v - optional values in the matrix 3854 3855 Level: developer 3856 3857 Notes: 3858 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3859 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3860 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3861 3862 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3863 3864 The format which is used for the sparse matrix input, is equivalent to a 3865 row-major ordering.. i.e for the following matrix, the input data expected is 3866 as shown: 3867 3868 1 0 0 3869 2 0 3 P0 3870 ------- 3871 4 5 6 P1 3872 3873 Process0 [P0]: rows_owned=[0,1] 3874 i = {0,1,3} [size = nrow+1 = 2+1] 3875 j = {0,0,2} [size = nz = 6] 3876 v = {1,2,3} [size = nz = 6] 3877 3878 Process1 [P1]: rows_owned=[2] 3879 i = {0,3} [size = nrow+1 = 1+1] 3880 j = {0,1,2} [size = nz = 6] 3881 v = {4,5,6} [size = nz = 6] 3882 3883 .keywords: matrix, aij, compressed row, sparse, parallel 3884 3885 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3886 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3887 @*/ 3888 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3889 { 3890 PetscErrorCode ierr; 3891 3892 PetscFunctionBegin; 3893 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3894 PetscFunctionReturn(0); 3895 } 3896 3897 #undef __FUNCT__ 3898 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3899 /*@C 3900 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3901 (the default parallel PETSc format). For good matrix assembly performance 3902 the user should preallocate the matrix storage by setting the parameters 3903 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3904 performance can be increased by more than a factor of 50. 3905 3906 Collective on MPI_Comm 3907 3908 Input Parameters: 3909 + A - the matrix 3910 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3911 (same value is used for all local rows) 3912 . d_nnz - array containing the number of nonzeros in the various rows of the 3913 DIAGONAL portion of the local submatrix (possibly different for each row) 3914 or NULL, if d_nz is used to specify the nonzero structure. 3915 The size of this array is equal to the number of local rows, i.e 'm'. 3916 For matrices that will be factored, you must leave room for (and set) 3917 the diagonal entry even if it is zero. 3918 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3919 submatrix (same value is used for all local rows). 3920 - o_nnz - array containing the number of nonzeros in the various rows of the 3921 OFF-DIAGONAL portion of the local submatrix (possibly different for 3922 each row) or NULL, if o_nz is used to specify the nonzero 3923 structure. The size of this array is equal to the number 3924 of local rows, i.e 'm'. 3925 3926 If the *_nnz parameter is given then the *_nz parameter is ignored 3927 3928 The AIJ format (also called the Yale sparse matrix format or 3929 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3930 storage. The stored row and column indices begin with zero. 3931 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3932 3933 The parallel matrix is partitioned such that the first m0 rows belong to 3934 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3935 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3936 3937 The DIAGONAL portion of the local submatrix of a processor can be defined 3938 as the submatrix which is obtained by extraction the part corresponding to 3939 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3940 first row that belongs to the processor, r2 is the last row belonging to 3941 the this processor, and c1-c2 is range of indices of the local part of a 3942 vector suitable for applying the matrix to. This is an mxn matrix. In the 3943 common case of a square matrix, the row and column ranges are the same and 3944 the DIAGONAL part is also square. The remaining portion of the local 3945 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3946 3947 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3948 3949 You can call MatGetInfo() to get information on how effective the preallocation was; 3950 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3951 You can also run with the option -info and look for messages with the string 3952 malloc in them to see if additional memory allocation was needed. 3953 3954 Example usage: 3955 3956 Consider the following 8x8 matrix with 34 non-zero values, that is 3957 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3958 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3959 as follows: 3960 3961 .vb 3962 1 2 0 | 0 3 0 | 0 4 3963 Proc0 0 5 6 | 7 0 0 | 8 0 3964 9 0 10 | 11 0 0 | 12 0 3965 ------------------------------------- 3966 13 0 14 | 15 16 17 | 0 0 3967 Proc1 0 18 0 | 19 20 21 | 0 0 3968 0 0 0 | 22 23 0 | 24 0 3969 ------------------------------------- 3970 Proc2 25 26 27 | 0 0 28 | 29 0 3971 30 0 0 | 31 32 33 | 0 34 3972 .ve 3973 3974 This can be represented as a collection of submatrices as: 3975 3976 .vb 3977 A B C 3978 D E F 3979 G H I 3980 .ve 3981 3982 Where the submatrices A,B,C are owned by proc0, D,E,F are 3983 owned by proc1, G,H,I are owned by proc2. 3984 3985 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3986 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3987 The 'M','N' parameters are 8,8, and have the same values on all procs. 3988 3989 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3990 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3991 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3992 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3993 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3994 matrix, ans [DF] as another SeqAIJ matrix. 3995 3996 When d_nz, o_nz parameters are specified, d_nz storage elements are 3997 allocated for every row of the local diagonal submatrix, and o_nz 3998 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3999 One way to choose d_nz and o_nz is to use the max nonzerors per local 4000 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4001 In this case, the values of d_nz,o_nz are: 4002 .vb 4003 proc0 : dnz = 2, o_nz = 2 4004 proc1 : dnz = 3, o_nz = 2 4005 proc2 : dnz = 1, o_nz = 4 4006 .ve 4007 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4008 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4009 for proc3. i.e we are using 12+15+10=37 storage locations to store 4010 34 values. 4011 4012 When d_nnz, o_nnz parameters are specified, the storage is specified 4013 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4014 In the above case the values for d_nnz,o_nnz are: 4015 .vb 4016 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4017 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4018 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4019 .ve 4020 Here the space allocated is sum of all the above values i.e 34, and 4021 hence pre-allocation is perfect. 4022 4023 Level: intermediate 4024 4025 .keywords: matrix, aij, compressed row, sparse, parallel 4026 4027 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4028 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4029 @*/ 4030 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4031 { 4032 PetscErrorCode ierr; 4033 4034 PetscFunctionBegin; 4035 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4036 PetscValidType(B,1); 4037 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4038 PetscFunctionReturn(0); 4039 } 4040 4041 #undef __FUNCT__ 4042 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4043 /*@ 4044 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4045 CSR format the local rows. 4046 4047 Collective on MPI_Comm 4048 4049 Input Parameters: 4050 + comm - MPI communicator 4051 . m - number of local rows (Cannot be PETSC_DECIDE) 4052 . n - This value should be the same as the local size used in creating the 4053 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4054 calculated if N is given) For square matrices n is almost always m. 4055 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4056 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4057 . i - row indices 4058 . j - column indices 4059 - a - matrix values 4060 4061 Output Parameter: 4062 . mat - the matrix 4063 4064 Level: intermediate 4065 4066 Notes: 4067 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4068 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4069 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4070 4071 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4072 4073 The format which is used for the sparse matrix input, is equivalent to a 4074 row-major ordering.. i.e for the following matrix, the input data expected is 4075 as shown: 4076 4077 1 0 0 4078 2 0 3 P0 4079 ------- 4080 4 5 6 P1 4081 4082 Process0 [P0]: rows_owned=[0,1] 4083 i = {0,1,3} [size = nrow+1 = 2+1] 4084 j = {0,0,2} [size = nz = 6] 4085 v = {1,2,3} [size = nz = 6] 4086 4087 Process1 [P1]: rows_owned=[2] 4088 i = {0,3} [size = nrow+1 = 1+1] 4089 j = {0,1,2} [size = nz = 6] 4090 v = {4,5,6} [size = nz = 6] 4091 4092 .keywords: matrix, aij, compressed row, sparse, parallel 4093 4094 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4095 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4096 @*/ 4097 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4098 { 4099 PetscErrorCode ierr; 4100 4101 PetscFunctionBegin; 4102 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4103 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4104 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4105 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4106 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4107 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4108 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4109 PetscFunctionReturn(0); 4110 } 4111 4112 #undef __FUNCT__ 4113 #define __FUNCT__ "MatCreateAIJ" 4114 /*@C 4115 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4116 (the default parallel PETSc format). For good matrix assembly performance 4117 the user should preallocate the matrix storage by setting the parameters 4118 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4119 performance can be increased by more than a factor of 50. 4120 4121 Collective on MPI_Comm 4122 4123 Input Parameters: 4124 + comm - MPI communicator 4125 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4126 This value should be the same as the local size used in creating the 4127 y vector for the matrix-vector product y = Ax. 4128 . n - This value should be the same as the local size used in creating the 4129 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4130 calculated if N is given) For square matrices n is almost always m. 4131 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4132 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4133 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4134 (same value is used for all local rows) 4135 . d_nnz - array containing the number of nonzeros in the various rows of the 4136 DIAGONAL portion of the local submatrix (possibly different for each row) 4137 or NULL, if d_nz is used to specify the nonzero structure. 4138 The size of this array is equal to the number of local rows, i.e 'm'. 4139 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4140 submatrix (same value is used for all local rows). 4141 - o_nnz - array containing the number of nonzeros in the various rows of the 4142 OFF-DIAGONAL portion of the local submatrix (possibly different for 4143 each row) or NULL, if o_nz is used to specify the nonzero 4144 structure. The size of this array is equal to the number 4145 of local rows, i.e 'm'. 4146 4147 Output Parameter: 4148 . A - the matrix 4149 4150 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4151 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4152 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4153 4154 Notes: 4155 If the *_nnz parameter is given then the *_nz parameter is ignored 4156 4157 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4158 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4159 storage requirements for this matrix. 4160 4161 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4162 processor than it must be used on all processors that share the object for 4163 that argument. 4164 4165 The user MUST specify either the local or global matrix dimensions 4166 (possibly both). 4167 4168 The parallel matrix is partitioned across processors such that the 4169 first m0 rows belong to process 0, the next m1 rows belong to 4170 process 1, the next m2 rows belong to process 2 etc.. where 4171 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4172 values corresponding to [m x N] submatrix. 4173 4174 The columns are logically partitioned with the n0 columns belonging 4175 to 0th partition, the next n1 columns belonging to the next 4176 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4177 4178 The DIAGONAL portion of the local submatrix on any given processor 4179 is the submatrix corresponding to the rows and columns m,n 4180 corresponding to the given processor. i.e diagonal matrix on 4181 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4182 etc. The remaining portion of the local submatrix [m x (N-n)] 4183 constitute the OFF-DIAGONAL portion. The example below better 4184 illustrates this concept. 4185 4186 For a square global matrix we define each processor's diagonal portion 4187 to be its local rows and the corresponding columns (a square submatrix); 4188 each processor's off-diagonal portion encompasses the remainder of the 4189 local matrix (a rectangular submatrix). 4190 4191 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4192 4193 When calling this routine with a single process communicator, a matrix of 4194 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4195 type of communicator, use the construction mechanism: 4196 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4197 4198 By default, this format uses inodes (identical nodes) when possible. 4199 We search for consecutive rows with the same nonzero structure, thereby 4200 reusing matrix information to achieve increased efficiency. 4201 4202 Options Database Keys: 4203 + -mat_no_inode - Do not use inodes 4204 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4205 - -mat_aij_oneindex - Internally use indexing starting at 1 4206 rather than 0. Note that when calling MatSetValues(), 4207 the user still MUST index entries starting at 0! 4208 4209 4210 Example usage: 4211 4212 Consider the following 8x8 matrix with 34 non-zero values, that is 4213 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4214 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4215 as follows: 4216 4217 .vb 4218 1 2 0 | 0 3 0 | 0 4 4219 Proc0 0 5 6 | 7 0 0 | 8 0 4220 9 0 10 | 11 0 0 | 12 0 4221 ------------------------------------- 4222 13 0 14 | 15 16 17 | 0 0 4223 Proc1 0 18 0 | 19 20 21 | 0 0 4224 0 0 0 | 22 23 0 | 24 0 4225 ------------------------------------- 4226 Proc2 25 26 27 | 0 0 28 | 29 0 4227 30 0 0 | 31 32 33 | 0 34 4228 .ve 4229 4230 This can be represented as a collection of submatrices as: 4231 4232 .vb 4233 A B C 4234 D E F 4235 G H I 4236 .ve 4237 4238 Where the submatrices A,B,C are owned by proc0, D,E,F are 4239 owned by proc1, G,H,I are owned by proc2. 4240 4241 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4242 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4243 The 'M','N' parameters are 8,8, and have the same values on all procs. 4244 4245 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4246 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4247 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4248 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4249 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4250 matrix, ans [DF] as another SeqAIJ matrix. 4251 4252 When d_nz, o_nz parameters are specified, d_nz storage elements are 4253 allocated for every row of the local diagonal submatrix, and o_nz 4254 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4255 One way to choose d_nz and o_nz is to use the max nonzerors per local 4256 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4257 In this case, the values of d_nz,o_nz are: 4258 .vb 4259 proc0 : dnz = 2, o_nz = 2 4260 proc1 : dnz = 3, o_nz = 2 4261 proc2 : dnz = 1, o_nz = 4 4262 .ve 4263 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4264 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4265 for proc3. i.e we are using 12+15+10=37 storage locations to store 4266 34 values. 4267 4268 When d_nnz, o_nnz parameters are specified, the storage is specified 4269 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4270 In the above case the values for d_nnz,o_nnz are: 4271 .vb 4272 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4273 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4274 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4275 .ve 4276 Here the space allocated is sum of all the above values i.e 34, and 4277 hence pre-allocation is perfect. 4278 4279 Level: intermediate 4280 4281 .keywords: matrix, aij, compressed row, sparse, parallel 4282 4283 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4284 MPIAIJ, MatCreateMPIAIJWithArrays() 4285 @*/ 4286 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4287 { 4288 PetscErrorCode ierr; 4289 PetscMPIInt size; 4290 4291 PetscFunctionBegin; 4292 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4293 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4294 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4295 if (size > 1) { 4296 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4297 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4298 } else { 4299 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4300 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4301 } 4302 PetscFunctionReturn(0); 4303 } 4304 4305 #undef __FUNCT__ 4306 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4307 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4308 { 4309 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4310 4311 PetscFunctionBegin; 4312 *Ad = a->A; 4313 *Ao = a->B; 4314 *colmap = a->garray; 4315 PetscFunctionReturn(0); 4316 } 4317 4318 #undef __FUNCT__ 4319 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4320 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4321 { 4322 PetscErrorCode ierr; 4323 PetscInt i; 4324 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4325 4326 PetscFunctionBegin; 4327 if (coloring->ctype == IS_COLORING_GLOBAL) { 4328 ISColoringValue *allcolors,*colors; 4329 ISColoring ocoloring; 4330 4331 /* set coloring for diagonal portion */ 4332 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4333 4334 /* set coloring for off-diagonal portion */ 4335 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4336 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4337 for (i=0; i<a->B->cmap->n; i++) { 4338 colors[i] = allcolors[a->garray[i]]; 4339 } 4340 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4341 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4342 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4343 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4344 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4345 ISColoringValue *colors; 4346 PetscInt *larray; 4347 ISColoring ocoloring; 4348 4349 /* set coloring for diagonal portion */ 4350 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4351 for (i=0; i<a->A->cmap->n; i++) { 4352 larray[i] = i + A->cmap->rstart; 4353 } 4354 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4355 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4356 for (i=0; i<a->A->cmap->n; i++) { 4357 colors[i] = coloring->colors[larray[i]]; 4358 } 4359 ierr = PetscFree(larray);CHKERRQ(ierr); 4360 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4361 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4362 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4363 4364 /* set coloring for off-diagonal portion */ 4365 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4366 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4367 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4368 for (i=0; i<a->B->cmap->n; i++) { 4369 colors[i] = coloring->colors[larray[i]]; 4370 } 4371 ierr = PetscFree(larray);CHKERRQ(ierr); 4372 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4373 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4374 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4375 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4376 PetscFunctionReturn(0); 4377 } 4378 4379 #undef __FUNCT__ 4380 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4381 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4382 { 4383 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4384 PetscErrorCode ierr; 4385 4386 PetscFunctionBegin; 4387 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4388 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4389 PetscFunctionReturn(0); 4390 } 4391 4392 #undef __FUNCT__ 4393 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4394 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4395 { 4396 PetscErrorCode ierr; 4397 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4398 PetscInt *indx; 4399 4400 PetscFunctionBegin; 4401 /* This routine will ONLY return MPIAIJ type matrix */ 4402 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4403 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4404 if (n == PETSC_DECIDE) { 4405 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4406 } 4407 /* Check sum(n) = N */ 4408 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4409 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4410 4411 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4412 rstart -= m; 4413 4414 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4415 for (i=0; i<m; i++) { 4416 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4417 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4418 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4419 } 4420 4421 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4422 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4423 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4424 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4425 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4426 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4427 PetscFunctionReturn(0); 4428 } 4429 4430 #undef __FUNCT__ 4431 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4432 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4433 { 4434 PetscErrorCode ierr; 4435 PetscInt m,N,i,rstart,nnz,Ii; 4436 PetscInt *indx; 4437 PetscScalar *values; 4438 4439 PetscFunctionBegin; 4440 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4441 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4442 for (i=0; i<m; i++) { 4443 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4444 Ii = i + rstart; 4445 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4446 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4447 } 4448 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4449 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4450 PetscFunctionReturn(0); 4451 } 4452 4453 #undef __FUNCT__ 4454 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4455 /*@ 4456 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4457 matrices from each processor 4458 4459 Collective on MPI_Comm 4460 4461 Input Parameters: 4462 + comm - the communicators the parallel matrix will live on 4463 . inmat - the input sequential matrices 4464 . n - number of local columns (or PETSC_DECIDE) 4465 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4466 4467 Output Parameter: 4468 . outmat - the parallel matrix generated 4469 4470 Level: advanced 4471 4472 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4473 4474 @*/ 4475 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4476 { 4477 PetscErrorCode ierr; 4478 PetscMPIInt size; 4479 4480 PetscFunctionBegin; 4481 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4482 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4483 if (size == 1) { 4484 if (scall == MAT_INITIAL_MATRIX) { 4485 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4486 } else { 4487 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4488 } 4489 } else { 4490 if (scall == MAT_INITIAL_MATRIX) { 4491 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4492 } 4493 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4494 } 4495 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4496 PetscFunctionReturn(0); 4497 } 4498 4499 #undef __FUNCT__ 4500 #define __FUNCT__ "MatFileSplit" 4501 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4502 { 4503 PetscErrorCode ierr; 4504 PetscMPIInt rank; 4505 PetscInt m,N,i,rstart,nnz; 4506 size_t len; 4507 const PetscInt *indx; 4508 PetscViewer out; 4509 char *name; 4510 Mat B; 4511 const PetscScalar *values; 4512 4513 PetscFunctionBegin; 4514 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4515 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4516 /* Should this be the type of the diagonal block of A? */ 4517 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4518 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4519 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4520 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4521 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4522 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4523 for (i=0; i<m; i++) { 4524 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4525 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4526 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4527 } 4528 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4529 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4530 4531 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4532 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4533 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4534 sprintf(name,"%s.%d",outfile,rank); 4535 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4536 ierr = PetscFree(name);CHKERRQ(ierr); 4537 ierr = MatView(B,out);CHKERRQ(ierr); 4538 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4539 ierr = MatDestroy(&B);CHKERRQ(ierr); 4540 PetscFunctionReturn(0); 4541 } 4542 4543 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4544 #undef __FUNCT__ 4545 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4546 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4547 { 4548 PetscErrorCode ierr; 4549 Mat_Merge_SeqsToMPI *merge; 4550 PetscContainer container; 4551 4552 PetscFunctionBegin; 4553 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4554 if (container) { 4555 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4557 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4558 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4559 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4560 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4561 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4562 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4563 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4564 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4565 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4566 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4567 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4568 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4569 ierr = PetscFree(merge);CHKERRQ(ierr); 4570 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4571 } 4572 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4573 PetscFunctionReturn(0); 4574 } 4575 4576 #include <../src/mat/utils/freespace.h> 4577 #include <petscbt.h> 4578 4579 #undef __FUNCT__ 4580 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4581 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4582 { 4583 PetscErrorCode ierr; 4584 MPI_Comm comm; 4585 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4586 PetscMPIInt size,rank,taga,*len_s; 4587 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4588 PetscInt proc,m; 4589 PetscInt **buf_ri,**buf_rj; 4590 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4591 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4592 MPI_Request *s_waits,*r_waits; 4593 MPI_Status *status; 4594 MatScalar *aa=a->a; 4595 MatScalar **abuf_r,*ba_i; 4596 Mat_Merge_SeqsToMPI *merge; 4597 PetscContainer container; 4598 4599 PetscFunctionBegin; 4600 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4601 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4602 4603 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4604 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4605 4606 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4607 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4608 4609 bi = merge->bi; 4610 bj = merge->bj; 4611 buf_ri = merge->buf_ri; 4612 buf_rj = merge->buf_rj; 4613 4614 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4615 owners = merge->rowmap->range; 4616 len_s = merge->len_s; 4617 4618 /* send and recv matrix values */ 4619 /*-----------------------------*/ 4620 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4621 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4622 4623 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4624 for (proc=0,k=0; proc<size; proc++) { 4625 if (!len_s[proc]) continue; 4626 i = owners[proc]; 4627 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4628 k++; 4629 } 4630 4631 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4632 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4633 ierr = PetscFree(status);CHKERRQ(ierr); 4634 4635 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4636 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4637 4638 /* insert mat values of mpimat */ 4639 /*----------------------------*/ 4640 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4641 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4642 4643 for (k=0; k<merge->nrecv; k++) { 4644 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4645 nrows = *(buf_ri_k[k]); 4646 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4647 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4648 } 4649 4650 /* set values of ba */ 4651 m = merge->rowmap->n; 4652 for (i=0; i<m; i++) { 4653 arow = owners[rank] + i; 4654 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4655 bnzi = bi[i+1] - bi[i]; 4656 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4657 4658 /* add local non-zero vals of this proc's seqmat into ba */ 4659 anzi = ai[arow+1] - ai[arow]; 4660 aj = a->j + ai[arow]; 4661 aa = a->a + ai[arow]; 4662 nextaj = 0; 4663 for (j=0; nextaj<anzi; j++) { 4664 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4665 ba_i[j] += aa[nextaj++]; 4666 } 4667 } 4668 4669 /* add received vals into ba */ 4670 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4671 /* i-th row */ 4672 if (i == *nextrow[k]) { 4673 anzi = *(nextai[k]+1) - *nextai[k]; 4674 aj = buf_rj[k] + *(nextai[k]); 4675 aa = abuf_r[k] + *(nextai[k]); 4676 nextaj = 0; 4677 for (j=0; nextaj<anzi; j++) { 4678 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4679 ba_i[j] += aa[nextaj++]; 4680 } 4681 } 4682 nextrow[k]++; nextai[k]++; 4683 } 4684 } 4685 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4686 } 4687 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4688 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4689 4690 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4691 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4692 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4693 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4694 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4695 PetscFunctionReturn(0); 4696 } 4697 4698 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4699 4700 #undef __FUNCT__ 4701 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4702 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4703 { 4704 PetscErrorCode ierr; 4705 Mat B_mpi; 4706 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4707 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4708 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4709 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4710 PetscInt len,proc,*dnz,*onz,bs,cbs; 4711 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4712 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4713 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4714 MPI_Status *status; 4715 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4716 PetscBT lnkbt; 4717 Mat_Merge_SeqsToMPI *merge; 4718 PetscContainer container; 4719 4720 PetscFunctionBegin; 4721 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4722 4723 /* make sure it is a PETSc comm */ 4724 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4725 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4726 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4727 4728 ierr = PetscNew(&merge);CHKERRQ(ierr); 4729 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4730 4731 /* determine row ownership */ 4732 /*---------------------------------------------------------*/ 4733 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4734 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4735 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4736 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4737 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4738 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4739 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4740 4741 m = merge->rowmap->n; 4742 owners = merge->rowmap->range; 4743 4744 /* determine the number of messages to send, their lengths */ 4745 /*---------------------------------------------------------*/ 4746 len_s = merge->len_s; 4747 4748 len = 0; /* length of buf_si[] */ 4749 merge->nsend = 0; 4750 for (proc=0; proc<size; proc++) { 4751 len_si[proc] = 0; 4752 if (proc == rank) { 4753 len_s[proc] = 0; 4754 } else { 4755 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4756 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4757 } 4758 if (len_s[proc]) { 4759 merge->nsend++; 4760 nrows = 0; 4761 for (i=owners[proc]; i<owners[proc+1]; i++) { 4762 if (ai[i+1] > ai[i]) nrows++; 4763 } 4764 len_si[proc] = 2*(nrows+1); 4765 len += len_si[proc]; 4766 } 4767 } 4768 4769 /* determine the number and length of messages to receive for ij-structure */ 4770 /*-------------------------------------------------------------------------*/ 4771 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4772 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4773 4774 /* post the Irecv of j-structure */ 4775 /*-------------------------------*/ 4776 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4777 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4778 4779 /* post the Isend of j-structure */ 4780 /*--------------------------------*/ 4781 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4782 4783 for (proc=0, k=0; proc<size; proc++) { 4784 if (!len_s[proc]) continue; 4785 i = owners[proc]; 4786 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4787 k++; 4788 } 4789 4790 /* receives and sends of j-structure are complete */ 4791 /*------------------------------------------------*/ 4792 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4793 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4794 4795 /* send and recv i-structure */ 4796 /*---------------------------*/ 4797 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4798 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4799 4800 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4801 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4802 for (proc=0,k=0; proc<size; proc++) { 4803 if (!len_s[proc]) continue; 4804 /* form outgoing message for i-structure: 4805 buf_si[0]: nrows to be sent 4806 [1:nrows]: row index (global) 4807 [nrows+1:2*nrows+1]: i-structure index 4808 */ 4809 /*-------------------------------------------*/ 4810 nrows = len_si[proc]/2 - 1; 4811 buf_si_i = buf_si + nrows+1; 4812 buf_si[0] = nrows; 4813 buf_si_i[0] = 0; 4814 nrows = 0; 4815 for (i=owners[proc]; i<owners[proc+1]; i++) { 4816 anzi = ai[i+1] - ai[i]; 4817 if (anzi) { 4818 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4819 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4820 nrows++; 4821 } 4822 } 4823 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4824 k++; 4825 buf_si += len_si[proc]; 4826 } 4827 4828 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4829 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4830 4831 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4832 for (i=0; i<merge->nrecv; i++) { 4833 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4834 } 4835 4836 ierr = PetscFree(len_si);CHKERRQ(ierr); 4837 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4838 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4839 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4840 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4841 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4842 ierr = PetscFree(status);CHKERRQ(ierr); 4843 4844 /* compute a local seq matrix in each processor */ 4845 /*----------------------------------------------*/ 4846 /* allocate bi array and free space for accumulating nonzero column info */ 4847 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4848 bi[0] = 0; 4849 4850 /* create and initialize a linked list */ 4851 nlnk = N+1; 4852 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4853 4854 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4855 len = ai[owners[rank+1]] - ai[owners[rank]]; 4856 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4857 4858 current_space = free_space; 4859 4860 /* determine symbolic info for each local row */ 4861 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4862 4863 for (k=0; k<merge->nrecv; k++) { 4864 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4865 nrows = *buf_ri_k[k]; 4866 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4867 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4868 } 4869 4870 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4871 len = 0; 4872 for (i=0; i<m; i++) { 4873 bnzi = 0; 4874 /* add local non-zero cols of this proc's seqmat into lnk */ 4875 arow = owners[rank] + i; 4876 anzi = ai[arow+1] - ai[arow]; 4877 aj = a->j + ai[arow]; 4878 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4879 bnzi += nlnk; 4880 /* add received col data into lnk */ 4881 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4882 if (i == *nextrow[k]) { /* i-th row */ 4883 anzi = *(nextai[k]+1) - *nextai[k]; 4884 aj = buf_rj[k] + *nextai[k]; 4885 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4886 bnzi += nlnk; 4887 nextrow[k]++; nextai[k]++; 4888 } 4889 } 4890 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4891 4892 /* if free space is not available, make more free space */ 4893 if (current_space->local_remaining<bnzi) { 4894 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4895 nspacedouble++; 4896 } 4897 /* copy data into free space, then initialize lnk */ 4898 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4899 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4900 4901 current_space->array += bnzi; 4902 current_space->local_used += bnzi; 4903 current_space->local_remaining -= bnzi; 4904 4905 bi[i+1] = bi[i] + bnzi; 4906 } 4907 4908 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4909 4910 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4911 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4912 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4913 4914 /* create symbolic parallel matrix B_mpi */ 4915 /*---------------------------------------*/ 4916 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4917 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4918 if (n==PETSC_DECIDE) { 4919 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4920 } else { 4921 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4922 } 4923 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4924 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4925 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4926 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4927 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4928 4929 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4930 B_mpi->assembled = PETSC_FALSE; 4931 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4932 merge->bi = bi; 4933 merge->bj = bj; 4934 merge->buf_ri = buf_ri; 4935 merge->buf_rj = buf_rj; 4936 merge->coi = NULL; 4937 merge->coj = NULL; 4938 merge->owners_co = NULL; 4939 4940 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4941 4942 /* attach the supporting struct to B_mpi for reuse */ 4943 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4944 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4945 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4946 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4947 *mpimat = B_mpi; 4948 4949 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4950 PetscFunctionReturn(0); 4951 } 4952 4953 #undef __FUNCT__ 4954 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4955 /*@C 4956 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4957 matrices from each processor 4958 4959 Collective on MPI_Comm 4960 4961 Input Parameters: 4962 + comm - the communicators the parallel matrix will live on 4963 . seqmat - the input sequential matrices 4964 . m - number of local rows (or PETSC_DECIDE) 4965 . n - number of local columns (or PETSC_DECIDE) 4966 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4967 4968 Output Parameter: 4969 . mpimat - the parallel matrix generated 4970 4971 Level: advanced 4972 4973 Notes: 4974 The dimensions of the sequential matrix in each processor MUST be the same. 4975 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4976 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4977 @*/ 4978 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4979 { 4980 PetscErrorCode ierr; 4981 PetscMPIInt size; 4982 4983 PetscFunctionBegin; 4984 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4985 if (size == 1) { 4986 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4987 if (scall == MAT_INITIAL_MATRIX) { 4988 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4989 } else { 4990 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4991 } 4992 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4993 PetscFunctionReturn(0); 4994 } 4995 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4996 if (scall == MAT_INITIAL_MATRIX) { 4997 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4998 } 4999 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5000 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5001 PetscFunctionReturn(0); 5002 } 5003 5004 #undef __FUNCT__ 5005 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5006 /*@ 5007 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5008 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5009 with MatGetSize() 5010 5011 Not Collective 5012 5013 Input Parameters: 5014 + A - the matrix 5015 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5016 5017 Output Parameter: 5018 . A_loc - the local sequential matrix generated 5019 5020 Level: developer 5021 5022 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5023 5024 @*/ 5025 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5026 { 5027 PetscErrorCode ierr; 5028 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5029 Mat_SeqAIJ *mat,*a,*b; 5030 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5031 MatScalar *aa,*ba,*cam; 5032 PetscScalar *ca; 5033 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5034 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5035 PetscBool match; 5036 5037 PetscFunctionBegin; 5038 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5039 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5040 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5041 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5042 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5043 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5044 aa = a->a; ba = b->a; 5045 if (scall == MAT_INITIAL_MATRIX) { 5046 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 5047 ci[0] = 0; 5048 for (i=0; i<am; i++) { 5049 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5050 } 5051 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 5052 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 5053 k = 0; 5054 for (i=0; i<am; i++) { 5055 ncols_o = bi[i+1] - bi[i]; 5056 ncols_d = ai[i+1] - ai[i]; 5057 /* off-diagonal portion of A */ 5058 for (jo=0; jo<ncols_o; jo++) { 5059 col = cmap[*bj]; 5060 if (col >= cstart) break; 5061 cj[k] = col; bj++; 5062 ca[k++] = *ba++; 5063 } 5064 /* diagonal portion of A */ 5065 for (j=0; j<ncols_d; j++) { 5066 cj[k] = cstart + *aj++; 5067 ca[k++] = *aa++; 5068 } 5069 /* off-diagonal portion of A */ 5070 for (j=jo; j<ncols_o; j++) { 5071 cj[k] = cmap[*bj++]; 5072 ca[k++] = *ba++; 5073 } 5074 } 5075 /* put together the new matrix */ 5076 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5077 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5078 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5079 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5080 mat->free_a = PETSC_TRUE; 5081 mat->free_ij = PETSC_TRUE; 5082 mat->nonew = 0; 5083 } else if (scall == MAT_REUSE_MATRIX) { 5084 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5085 ci = mat->i; cj = mat->j; cam = mat->a; 5086 for (i=0; i<am; i++) { 5087 /* off-diagonal portion of A */ 5088 ncols_o = bi[i+1] - bi[i]; 5089 for (jo=0; jo<ncols_o; jo++) { 5090 col = cmap[*bj]; 5091 if (col >= cstart) break; 5092 *cam++ = *ba++; bj++; 5093 } 5094 /* diagonal portion of A */ 5095 ncols_d = ai[i+1] - ai[i]; 5096 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5097 /* off-diagonal portion of A */ 5098 for (j=jo; j<ncols_o; j++) { 5099 *cam++ = *ba++; bj++; 5100 } 5101 } 5102 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5103 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5104 PetscFunctionReturn(0); 5105 } 5106 5107 #undef __FUNCT__ 5108 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5109 /*@C 5110 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5111 5112 Not Collective 5113 5114 Input Parameters: 5115 + A - the matrix 5116 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5117 - row, col - index sets of rows and columns to extract (or NULL) 5118 5119 Output Parameter: 5120 . A_loc - the local sequential matrix generated 5121 5122 Level: developer 5123 5124 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5125 5126 @*/ 5127 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5128 { 5129 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5130 PetscErrorCode ierr; 5131 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5132 IS isrowa,iscola; 5133 Mat *aloc; 5134 PetscBool match; 5135 5136 PetscFunctionBegin; 5137 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5138 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5139 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5140 if (!row) { 5141 start = A->rmap->rstart; end = A->rmap->rend; 5142 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5143 } else { 5144 isrowa = *row; 5145 } 5146 if (!col) { 5147 start = A->cmap->rstart; 5148 cmap = a->garray; 5149 nzA = a->A->cmap->n; 5150 nzB = a->B->cmap->n; 5151 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5152 ncols = 0; 5153 for (i=0; i<nzB; i++) { 5154 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5155 else break; 5156 } 5157 imark = i; 5158 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5159 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5160 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5161 } else { 5162 iscola = *col; 5163 } 5164 if (scall != MAT_INITIAL_MATRIX) { 5165 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5166 aloc[0] = *A_loc; 5167 } 5168 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5169 *A_loc = aloc[0]; 5170 ierr = PetscFree(aloc);CHKERRQ(ierr); 5171 if (!row) { 5172 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5173 } 5174 if (!col) { 5175 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5176 } 5177 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5178 PetscFunctionReturn(0); 5179 } 5180 5181 #undef __FUNCT__ 5182 #define __FUNCT__ "MatGetBrowsOfAcols" 5183 /*@C 5184 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5185 5186 Collective on Mat 5187 5188 Input Parameters: 5189 + A,B - the matrices in mpiaij format 5190 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5191 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5192 5193 Output Parameter: 5194 + rowb, colb - index sets of rows and columns of B to extract 5195 - B_seq - the sequential matrix generated 5196 5197 Level: developer 5198 5199 @*/ 5200 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5201 { 5202 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5203 PetscErrorCode ierr; 5204 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5205 IS isrowb,iscolb; 5206 Mat *bseq=NULL; 5207 5208 PetscFunctionBegin; 5209 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5210 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5211 } 5212 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5213 5214 if (scall == MAT_INITIAL_MATRIX) { 5215 start = A->cmap->rstart; 5216 cmap = a->garray; 5217 nzA = a->A->cmap->n; 5218 nzB = a->B->cmap->n; 5219 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5220 ncols = 0; 5221 for (i=0; i<nzB; i++) { /* row < local row index */ 5222 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5223 else break; 5224 } 5225 imark = i; 5226 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5227 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5228 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5229 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5230 } else { 5231 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5232 isrowb = *rowb; iscolb = *colb; 5233 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5234 bseq[0] = *B_seq; 5235 } 5236 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5237 *B_seq = bseq[0]; 5238 ierr = PetscFree(bseq);CHKERRQ(ierr); 5239 if (!rowb) { 5240 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5241 } else { 5242 *rowb = isrowb; 5243 } 5244 if (!colb) { 5245 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5246 } else { 5247 *colb = iscolb; 5248 } 5249 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5250 PetscFunctionReturn(0); 5251 } 5252 5253 #undef __FUNCT__ 5254 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5255 /* 5256 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5257 of the OFF-DIAGONAL portion of local A 5258 5259 Collective on Mat 5260 5261 Input Parameters: 5262 + A,B - the matrices in mpiaij format 5263 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5264 5265 Output Parameter: 5266 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5267 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5268 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5269 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5270 5271 Level: developer 5272 5273 */ 5274 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5275 { 5276 VecScatter_MPI_General *gen_to,*gen_from; 5277 PetscErrorCode ierr; 5278 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5279 Mat_SeqAIJ *b_oth; 5280 VecScatter ctx =a->Mvctx; 5281 MPI_Comm comm; 5282 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5283 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5284 PetscScalar *rvalues,*svalues; 5285 MatScalar *b_otha,*bufa,*bufA; 5286 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5287 MPI_Request *rwaits = NULL,*swaits = NULL; 5288 MPI_Status *sstatus,rstatus; 5289 PetscMPIInt jj; 5290 PetscInt *cols,sbs,rbs; 5291 PetscScalar *vals; 5292 5293 PetscFunctionBegin; 5294 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5295 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5296 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5297 } 5298 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5299 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5300 5301 gen_to = (VecScatter_MPI_General*)ctx->todata; 5302 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5303 rvalues = gen_from->values; /* holds the length of receiving row */ 5304 svalues = gen_to->values; /* holds the length of sending row */ 5305 nrecvs = gen_from->n; 5306 nsends = gen_to->n; 5307 5308 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5309 srow = gen_to->indices; /* local row index to be sent */ 5310 sstarts = gen_to->starts; 5311 sprocs = gen_to->procs; 5312 sstatus = gen_to->sstatus; 5313 sbs = gen_to->bs; 5314 rstarts = gen_from->starts; 5315 rprocs = gen_from->procs; 5316 rbs = gen_from->bs; 5317 5318 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5319 if (scall == MAT_INITIAL_MATRIX) { 5320 /* i-array */ 5321 /*---------*/ 5322 /* post receives */ 5323 for (i=0; i<nrecvs; i++) { 5324 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5325 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5326 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5327 } 5328 5329 /* pack the outgoing message */ 5330 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5331 5332 sstartsj[0] = 0; 5333 rstartsj[0] = 0; 5334 len = 0; /* total length of j or a array to be sent */ 5335 k = 0; 5336 for (i=0; i<nsends; i++) { 5337 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5338 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5339 for (j=0; j<nrows; j++) { 5340 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5341 for (l=0; l<sbs; l++) { 5342 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5343 5344 rowlen[j*sbs+l] = ncols; 5345 5346 len += ncols; 5347 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5348 } 5349 k++; 5350 } 5351 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5352 5353 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5354 } 5355 /* recvs and sends of i-array are completed */ 5356 i = nrecvs; 5357 while (i--) { 5358 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5359 } 5360 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5361 5362 /* allocate buffers for sending j and a arrays */ 5363 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5364 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5365 5366 /* create i-array of B_oth */ 5367 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5368 5369 b_othi[0] = 0; 5370 len = 0; /* total length of j or a array to be received */ 5371 k = 0; 5372 for (i=0; i<nrecvs; i++) { 5373 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5374 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5375 for (j=0; j<nrows; j++) { 5376 b_othi[k+1] = b_othi[k] + rowlen[j]; 5377 len += rowlen[j]; k++; 5378 } 5379 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5380 } 5381 5382 /* allocate space for j and a arrrays of B_oth */ 5383 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5384 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5385 5386 /* j-array */ 5387 /*---------*/ 5388 /* post receives of j-array */ 5389 for (i=0; i<nrecvs; i++) { 5390 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5391 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5392 } 5393 5394 /* pack the outgoing message j-array */ 5395 k = 0; 5396 for (i=0; i<nsends; i++) { 5397 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5398 bufJ = bufj+sstartsj[i]; 5399 for (j=0; j<nrows; j++) { 5400 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5401 for (ll=0; ll<sbs; ll++) { 5402 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5403 for (l=0; l<ncols; l++) { 5404 *bufJ++ = cols[l]; 5405 } 5406 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5407 } 5408 } 5409 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5410 } 5411 5412 /* recvs and sends of j-array are completed */ 5413 i = nrecvs; 5414 while (i--) { 5415 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5416 } 5417 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5418 } else if (scall == MAT_REUSE_MATRIX) { 5419 sstartsj = *startsj_s; 5420 rstartsj = *startsj_r; 5421 bufa = *bufa_ptr; 5422 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5423 b_otha = b_oth->a; 5424 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5425 5426 /* a-array */ 5427 /*---------*/ 5428 /* post receives of a-array */ 5429 for (i=0; i<nrecvs; i++) { 5430 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5431 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5432 } 5433 5434 /* pack the outgoing message a-array */ 5435 k = 0; 5436 for (i=0; i<nsends; i++) { 5437 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5438 bufA = bufa+sstartsj[i]; 5439 for (j=0; j<nrows; j++) { 5440 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5441 for (ll=0; ll<sbs; ll++) { 5442 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5443 for (l=0; l<ncols; l++) { 5444 *bufA++ = vals[l]; 5445 } 5446 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5447 } 5448 } 5449 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5450 } 5451 /* recvs and sends of a-array are completed */ 5452 i = nrecvs; 5453 while (i--) { 5454 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5455 } 5456 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5457 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5458 5459 if (scall == MAT_INITIAL_MATRIX) { 5460 /* put together the new matrix */ 5461 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5462 5463 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5464 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5465 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5466 b_oth->free_a = PETSC_TRUE; 5467 b_oth->free_ij = PETSC_TRUE; 5468 b_oth->nonew = 0; 5469 5470 ierr = PetscFree(bufj);CHKERRQ(ierr); 5471 if (!startsj_s || !bufa_ptr) { 5472 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5473 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5474 } else { 5475 *startsj_s = sstartsj; 5476 *startsj_r = rstartsj; 5477 *bufa_ptr = bufa; 5478 } 5479 } 5480 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5481 PetscFunctionReturn(0); 5482 } 5483 5484 #undef __FUNCT__ 5485 #define __FUNCT__ "MatGetCommunicationStructs" 5486 /*@C 5487 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5488 5489 Not Collective 5490 5491 Input Parameters: 5492 . A - The matrix in mpiaij format 5493 5494 Output Parameter: 5495 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5496 . colmap - A map from global column index to local index into lvec 5497 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5498 5499 Level: developer 5500 5501 @*/ 5502 #if defined(PETSC_USE_CTABLE) 5503 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5504 #else 5505 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5506 #endif 5507 { 5508 Mat_MPIAIJ *a; 5509 5510 PetscFunctionBegin; 5511 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5512 PetscValidPointer(lvec, 2); 5513 PetscValidPointer(colmap, 3); 5514 PetscValidPointer(multScatter, 4); 5515 a = (Mat_MPIAIJ*) A->data; 5516 if (lvec) *lvec = a->lvec; 5517 if (colmap) *colmap = a->colmap; 5518 if (multScatter) *multScatter = a->Mvctx; 5519 PetscFunctionReturn(0); 5520 } 5521 5522 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5523 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5524 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5525 5526 #undef __FUNCT__ 5527 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5528 /* 5529 Computes (B'*A')' since computing B*A directly is untenable 5530 5531 n p p 5532 ( ) ( ) ( ) 5533 m ( A ) * n ( B ) = m ( C ) 5534 ( ) ( ) ( ) 5535 5536 */ 5537 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5538 { 5539 PetscErrorCode ierr; 5540 Mat At,Bt,Ct; 5541 5542 PetscFunctionBegin; 5543 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5544 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5545 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5546 ierr = MatDestroy(&At);CHKERRQ(ierr); 5547 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5548 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5549 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5550 PetscFunctionReturn(0); 5551 } 5552 5553 #undef __FUNCT__ 5554 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5555 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5556 { 5557 PetscErrorCode ierr; 5558 PetscInt m=A->rmap->n,n=B->cmap->n; 5559 Mat Cmat; 5560 5561 PetscFunctionBegin; 5562 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5563 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5564 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5565 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5566 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5567 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5568 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5569 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5570 5571 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5572 5573 *C = Cmat; 5574 PetscFunctionReturn(0); 5575 } 5576 5577 /* ----------------------------------------------------------------*/ 5578 #undef __FUNCT__ 5579 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5580 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5581 { 5582 PetscErrorCode ierr; 5583 5584 PetscFunctionBegin; 5585 if (scall == MAT_INITIAL_MATRIX) { 5586 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5587 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5588 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5589 } 5590 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5591 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5592 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5593 PetscFunctionReturn(0); 5594 } 5595 5596 #if defined(PETSC_HAVE_MUMPS) 5597 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5598 #endif 5599 #if defined(PETSC_HAVE_PASTIX) 5600 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5601 #endif 5602 #if defined(PETSC_HAVE_SUPERLU_DIST) 5603 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5604 #endif 5605 #if defined(PETSC_HAVE_CLIQUE) 5606 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5607 #endif 5608 5609 /*MC 5610 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5611 5612 Options Database Keys: 5613 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5614 5615 Level: beginner 5616 5617 .seealso: MatCreateAIJ() 5618 M*/ 5619 5620 #undef __FUNCT__ 5621 #define __FUNCT__ "MatCreate_MPIAIJ" 5622 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5623 { 5624 Mat_MPIAIJ *b; 5625 PetscErrorCode ierr; 5626 PetscMPIInt size; 5627 5628 PetscFunctionBegin; 5629 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5630 5631 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5632 B->data = (void*)b; 5633 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5634 B->assembled = PETSC_FALSE; 5635 B->insertmode = NOT_SET_VALUES; 5636 b->size = size; 5637 5638 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5639 5640 /* build cache for off array entries formed */ 5641 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5642 5643 b->donotstash = PETSC_FALSE; 5644 b->colmap = 0; 5645 b->garray = 0; 5646 b->roworiented = PETSC_TRUE; 5647 5648 /* stuff used for matrix vector multiply */ 5649 b->lvec = NULL; 5650 b->Mvctx = NULL; 5651 5652 /* stuff for MatGetRow() */ 5653 b->rowindices = 0; 5654 b->rowvalues = 0; 5655 b->getrowactive = PETSC_FALSE; 5656 5657 /* flexible pointer used in CUSP/CUSPARSE classes */ 5658 b->spptr = NULL; 5659 5660 #if defined(PETSC_HAVE_MUMPS) 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5662 #endif 5663 #if defined(PETSC_HAVE_PASTIX) 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5665 #endif 5666 #if defined(PETSC_HAVE_SUPERLU_DIST) 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5668 #endif 5669 #if defined(PETSC_HAVE_CLIQUE) 5670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5671 #endif 5672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5685 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5686 PetscFunctionReturn(0); 5687 } 5688 5689 #undef __FUNCT__ 5690 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5691 /*@ 5692 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5693 and "off-diagonal" part of the matrix in CSR format. 5694 5695 Collective on MPI_Comm 5696 5697 Input Parameters: 5698 + comm - MPI communicator 5699 . m - number of local rows (Cannot be PETSC_DECIDE) 5700 . n - This value should be the same as the local size used in creating the 5701 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5702 calculated if N is given) For square matrices n is almost always m. 5703 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5704 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5705 . i - row indices for "diagonal" portion of matrix 5706 . j - column indices 5707 . a - matrix values 5708 . oi - row indices for "off-diagonal" portion of matrix 5709 . oj - column indices 5710 - oa - matrix values 5711 5712 Output Parameter: 5713 . mat - the matrix 5714 5715 Level: advanced 5716 5717 Notes: 5718 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5719 must free the arrays once the matrix has been destroyed and not before. 5720 5721 The i and j indices are 0 based 5722 5723 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5724 5725 This sets local rows and cannot be used to set off-processor values. 5726 5727 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5728 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5729 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5730 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5731 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5732 communication if it is known that only local entries will be set. 5733 5734 .keywords: matrix, aij, compressed row, sparse, parallel 5735 5736 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5737 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5738 @*/ 5739 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5740 { 5741 PetscErrorCode ierr; 5742 Mat_MPIAIJ *maij; 5743 5744 PetscFunctionBegin; 5745 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5746 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5747 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5748 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5749 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5750 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5751 maij = (Mat_MPIAIJ*) (*mat)->data; 5752 5753 (*mat)->preallocated = PETSC_TRUE; 5754 5755 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5756 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5757 5758 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5759 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5760 5761 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5762 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5763 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5764 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5765 5766 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5767 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5768 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5769 PetscFunctionReturn(0); 5770 } 5771 5772 /* 5773 Special version for direct calls from Fortran 5774 */ 5775 #include <petsc-private/fortranimpl.h> 5776 5777 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5778 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5779 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5780 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5781 #endif 5782 5783 /* Change these macros so can be used in void function */ 5784 #undef CHKERRQ 5785 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5786 #undef SETERRQ2 5787 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5788 #undef SETERRQ3 5789 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5790 #undef SETERRQ 5791 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5792 5793 #undef __FUNCT__ 5794 #define __FUNCT__ "matsetvaluesmpiaij_" 5795 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5796 { 5797 Mat mat = *mmat; 5798 PetscInt m = *mm, n = *mn; 5799 InsertMode addv = *maddv; 5800 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5801 PetscScalar value; 5802 PetscErrorCode ierr; 5803 5804 MatCheckPreallocated(mat,1); 5805 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5806 5807 #if defined(PETSC_USE_DEBUG) 5808 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5809 #endif 5810 { 5811 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5812 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5813 PetscBool roworiented = aij->roworiented; 5814 5815 /* Some Variables required in the macro */ 5816 Mat A = aij->A; 5817 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5818 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5819 MatScalar *aa = a->a; 5820 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5821 Mat B = aij->B; 5822 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5823 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5824 MatScalar *ba = b->a; 5825 5826 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5827 PetscInt nonew = a->nonew; 5828 MatScalar *ap1,*ap2; 5829 5830 PetscFunctionBegin; 5831 for (i=0; i<m; i++) { 5832 if (im[i] < 0) continue; 5833 #if defined(PETSC_USE_DEBUG) 5834 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5835 #endif 5836 if (im[i] >= rstart && im[i] < rend) { 5837 row = im[i] - rstart; 5838 lastcol1 = -1; 5839 rp1 = aj + ai[row]; 5840 ap1 = aa + ai[row]; 5841 rmax1 = aimax[row]; 5842 nrow1 = ailen[row]; 5843 low1 = 0; 5844 high1 = nrow1; 5845 lastcol2 = -1; 5846 rp2 = bj + bi[row]; 5847 ap2 = ba + bi[row]; 5848 rmax2 = bimax[row]; 5849 nrow2 = bilen[row]; 5850 low2 = 0; 5851 high2 = nrow2; 5852 5853 for (j=0; j<n; j++) { 5854 if (roworiented) value = v[i*n+j]; 5855 else value = v[i+j*m]; 5856 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5857 if (in[j] >= cstart && in[j] < cend) { 5858 col = in[j] - cstart; 5859 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5860 } else if (in[j] < 0) continue; 5861 #if defined(PETSC_USE_DEBUG) 5862 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5863 #endif 5864 else { 5865 if (mat->was_assembled) { 5866 if (!aij->colmap) { 5867 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5868 } 5869 #if defined(PETSC_USE_CTABLE) 5870 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5871 col--; 5872 #else 5873 col = aij->colmap[in[j]] - 1; 5874 #endif 5875 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5876 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5877 col = in[j]; 5878 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5879 B = aij->B; 5880 b = (Mat_SeqAIJ*)B->data; 5881 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5882 rp2 = bj + bi[row]; 5883 ap2 = ba + bi[row]; 5884 rmax2 = bimax[row]; 5885 nrow2 = bilen[row]; 5886 low2 = 0; 5887 high2 = nrow2; 5888 bm = aij->B->rmap->n; 5889 ba = b->a; 5890 } 5891 } else col = in[j]; 5892 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5893 } 5894 } 5895 } else if (!aij->donotstash) { 5896 if (roworiented) { 5897 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5898 } else { 5899 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5900 } 5901 } 5902 } 5903 } 5904 PetscFunctionReturnVoid(); 5905 } 5906 5907