1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory((PetscObject)mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 714 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 715 716 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 717 718 aij->rowvalues = 0; 719 720 /* used by MatAXPY() */ 721 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 722 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 723 724 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 725 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 738 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_MPIAIJ" 744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 745 { 746 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 747 PetscInt *owners = A->rmap->range; 748 PetscInt n = A->rmap->n; 749 PetscMPIInt size = mat->size; 750 PetscSF sf; 751 PetscInt *lrows; 752 PetscSFNode *rrows; 753 PetscInt lastidx = -1, r, p = 0, len = 0; 754 PetscErrorCode ierr; 755 756 PetscFunctionBegin; 757 /* Create SF where leaves are input rows and roots are owned rows */ 758 ierr = PetscMalloc(n * sizeof(PetscInt), &lrows);CHKERRQ(ierr); 759 for (r = 0; r < n; ++r) lrows[r] = -1; 760 ierr = PetscMalloc(N * sizeof(PetscSFNode), &rrows);CHKERRQ(ierr); 761 for (r = 0; r < N; ++r) { 762 const PetscInt idx = rows[r]; 763 PetscBool found = PETSC_FALSE; 764 /* Trick for efficient searching for sorted rows */ 765 if (lastidx > idx) p = 0; 766 lastidx = idx; 767 for (; p < size; ++p) { 768 if (idx >= owners[p] && idx < owners[p+1]) { 769 rrows[r].rank = p; 770 rrows[r].index = rows[r] - owners[p]; 771 found = PETSC_TRUE; 772 break; 773 } 774 } 775 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 776 } 777 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 778 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 779 /* Collect flags for rows to be zeroed */ 780 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 781 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 782 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 783 /* Compress and put in row numbers */ 784 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 792 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 793 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 794 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 795 } 796 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 797 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 798 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 799 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 800 } else if (diag != 0.0) { 801 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 802 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 803 for (r = 0; r < len; ++r) { 804 const PetscInt row = lrows[r] + A->rmap->rstart; 805 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 806 } 807 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 808 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 809 } else { 810 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 811 } 812 ierr = PetscFree(lrows);CHKERRQ(ierr); 813 PetscFunctionReturn(0); 814 } 815 816 #undef __FUNCT__ 817 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 818 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 819 { 820 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 821 PetscErrorCode ierr; 822 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 823 PetscInt i,*owners = A->rmap->range; 824 PetscInt *nprocs,j,idx,nsends; 825 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 826 PetscInt *rvalues,count,base,slen,*source; 827 PetscInt *lens,*lrows,*values,m; 828 MPI_Comm comm; 829 MPI_Request *send_waits,*recv_waits; 830 MPI_Status recv_status,*send_status; 831 const PetscScalar *xx; 832 PetscScalar *bb,*mask; 833 Vec xmask,lmask; 834 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 835 const PetscInt *aj, *ii,*ridx; 836 PetscScalar *aa; 837 #if defined(PETSC_DEBUG) 838 PetscBool found = PETSC_FALSE; 839 #endif 840 841 PetscFunctionBegin; 842 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 843 /* first count number of contributors to each processor */ 844 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 845 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 846 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 847 j = 0; 848 for (i=0; i<N; i++) { 849 if (lastidx > (idx = rows[i])) j = 0; 850 lastidx = idx; 851 for (; j<size; j++) { 852 if (idx >= owners[j] && idx < owners[j+1]) { 853 nprocs[2*j]++; 854 nprocs[2*j+1] = 1; 855 owner[i] = j; 856 #if defined(PETSC_DEBUG) 857 found = PETSC_TRUE; 858 #endif 859 break; 860 } 861 } 862 #if defined(PETSC_DEBUG) 863 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 864 found = PETSC_FALSE; 865 #endif 866 } 867 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 868 869 /* inform other processors of number of messages and max length*/ 870 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 871 872 /* post receives: */ 873 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 874 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 875 for (i=0; i<nrecvs; i++) { 876 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 877 } 878 879 /* do sends: 880 1) starts[i] gives the starting index in svalues for stuff going to 881 the ith processor 882 */ 883 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 884 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 885 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 886 887 starts[0] = 0; 888 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 889 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 890 891 starts[0] = 0; 892 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 893 count = 0; 894 for (i=0; i<size; i++) { 895 if (nprocs[2*i+1]) { 896 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 897 } 898 } 899 ierr = PetscFree(starts);CHKERRQ(ierr); 900 901 base = owners[rank]; 902 903 /* wait on receives */ 904 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 905 count = nrecvs; slen = 0; 906 while (count) { 907 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 908 /* unpack receives into our local space */ 909 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 910 911 source[imdex] = recv_status.MPI_SOURCE; 912 lens[imdex] = n; 913 slen += n; 914 count--; 915 } 916 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 917 918 /* move the data into the send scatter */ 919 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 920 count = 0; 921 for (i=0; i<nrecvs; i++) { 922 values = rvalues + i*nmax; 923 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 924 } 925 ierr = PetscFree(rvalues);CHKERRQ(ierr); 926 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 927 ierr = PetscFree(owner);CHKERRQ(ierr); 928 ierr = PetscFree(nprocs);CHKERRQ(ierr); 929 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 930 931 /* zero diagonal part of matrix */ 932 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 933 934 /* handle off diagonal part of matrix */ 935 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 936 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 937 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 938 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 939 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 940 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 942 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 943 if (x) { 944 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 947 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 948 } 949 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 950 951 /* remove zeroed rows of off diagonal matrix */ 952 ii = aij->i; 953 for (i=0; i<slen; i++) { 954 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 955 } 956 957 /* loop over all elements of off process part of matrix zeroing removed columns*/ 958 if (aij->compressedrow.use) { 959 m = aij->compressedrow.nrows; 960 ii = aij->compressedrow.i; 961 ridx = aij->compressedrow.rindex; 962 for (i=0; i<m; i++) { 963 n = ii[i+1] - ii[i]; 964 aj = aij->j + ii[i]; 965 aa = aij->a + ii[i]; 966 967 for (j=0; j<n; j++) { 968 if (PetscAbsScalar(mask[*aj])) { 969 if (b) bb[*ridx] -= *aa*xx[*aj]; 970 *aa = 0.0; 971 } 972 aa++; 973 aj++; 974 } 975 ridx++; 976 } 977 } else { /* do not use compressed row format */ 978 m = l->B->rmap->n; 979 for (i=0; i<m; i++) { 980 n = ii[i+1] - ii[i]; 981 aj = aij->j + ii[i]; 982 aa = aij->a + ii[i]; 983 for (j=0; j<n; j++) { 984 if (PetscAbsScalar(mask[*aj])) { 985 if (b) bb[i] -= *aa*xx[*aj]; 986 *aa = 0.0; 987 } 988 aa++; 989 aj++; 990 } 991 } 992 } 993 if (x) { 994 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 995 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 996 } 997 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 998 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 999 ierr = PetscFree(lrows);CHKERRQ(ierr); 1000 1001 /* wait on sends */ 1002 if (nsends) { 1003 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1004 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1005 ierr = PetscFree(send_status);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1008 ierr = PetscFree(svalues);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatMult_MPIAIJ" 1014 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1015 { 1016 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1017 PetscErrorCode ierr; 1018 PetscInt nt; 1019 1020 PetscFunctionBegin; 1021 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1022 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1023 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1024 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1032 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1035 PetscErrorCode ierr; 1036 1037 PetscFunctionBegin; 1038 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1044 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1045 { 1046 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1047 PetscErrorCode ierr; 1048 1049 PetscFunctionBegin; 1050 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1051 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1059 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1060 { 1061 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1062 PetscErrorCode ierr; 1063 PetscBool merged; 1064 1065 PetscFunctionBegin; 1066 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1067 /* do nondiagonal part */ 1068 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1069 if (!merged) { 1070 /* send it on its way */ 1071 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1072 /* do local part */ 1073 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1074 /* receive remote parts: note this assumes the values are not actually */ 1075 /* added in yy until the next line, */ 1076 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1077 } else { 1078 /* do local part */ 1079 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1080 /* send it on its way */ 1081 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1082 /* values actually were received in the Begin() but we need to call this nop */ 1083 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1090 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1091 { 1092 MPI_Comm comm; 1093 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1094 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1095 IS Me,Notme; 1096 PetscErrorCode ierr; 1097 PetscInt M,N,first,last,*notme,i; 1098 PetscMPIInt size; 1099 1100 PetscFunctionBegin; 1101 /* Easy test: symmetric diagonal block */ 1102 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1103 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1104 if (!*f) PetscFunctionReturn(0); 1105 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1106 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1107 if (size == 1) PetscFunctionReturn(0); 1108 1109 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1110 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1111 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1112 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1113 for (i=0; i<first; i++) notme[i] = i; 1114 for (i=last; i<M; i++) notme[i-last+first] = i; 1115 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1116 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1117 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1118 Aoff = Aoffs[0]; 1119 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1120 Boff = Boffs[0]; 1121 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1122 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1123 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1124 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1125 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1126 ierr = PetscFree(notme);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1132 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 /* do nondiagonal part */ 1139 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1140 /* send it on its way */ 1141 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1142 /* do local part */ 1143 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1144 /* receive remote parts */ 1145 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 /* 1150 This only works correctly for square matrices where the subblock A->A is the 1151 diagonal block 1152 */ 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1155 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1156 { 1157 PetscErrorCode ierr; 1158 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1159 1160 PetscFunctionBegin; 1161 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1162 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1163 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatScale_MPIAIJ" 1169 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1170 { 1171 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1172 PetscErrorCode ierr; 1173 1174 PetscFunctionBegin; 1175 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1176 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatDestroy_MPIAIJ" 1182 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1183 { 1184 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1185 PetscErrorCode ierr; 1186 1187 PetscFunctionBegin; 1188 #if defined(PETSC_USE_LOG) 1189 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1190 #endif 1191 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1192 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1193 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1194 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1195 #if defined(PETSC_USE_CTABLE) 1196 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1197 #else 1198 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1199 #endif 1200 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1201 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1202 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1203 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1204 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1205 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1206 1207 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1208 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1209 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1210 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1211 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1212 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1213 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1221 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1222 { 1223 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1224 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1225 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1226 PetscErrorCode ierr; 1227 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1228 int fd; 1229 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1230 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1231 PetscScalar *column_values; 1232 PetscInt message_count,flowcontrolcount; 1233 FILE *file; 1234 1235 PetscFunctionBegin; 1236 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1237 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1238 nz = A->nz + B->nz; 1239 if (!rank) { 1240 header[0] = MAT_FILE_CLASSID; 1241 header[1] = mat->rmap->N; 1242 header[2] = mat->cmap->N; 1243 1244 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1245 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1246 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1247 /* get largest number of rows any processor has */ 1248 rlen = mat->rmap->n; 1249 range = mat->rmap->range; 1250 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1251 } else { 1252 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1253 rlen = mat->rmap->n; 1254 } 1255 1256 /* load up the local row counts */ 1257 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1258 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1259 1260 /* store the row lengths to the file */ 1261 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1262 if (!rank) { 1263 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 for (i=1; i<size; i++) { 1265 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1266 rlen = range[i+1] - range[i]; 1267 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1269 } 1270 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1271 } else { 1272 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1273 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1274 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1275 } 1276 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1277 1278 /* load up the local column indices */ 1279 nzmax = nz; /* th processor needs space a largest processor needs */ 1280 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1281 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1282 cnt = 0; 1283 for (i=0; i<mat->rmap->n; i++) { 1284 for (j=B->i[i]; j<B->i[i+1]; j++) { 1285 if ((col = garray[B->j[j]]) > cstart) break; 1286 column_indices[cnt++] = col; 1287 } 1288 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1289 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1290 } 1291 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1292 1293 /* store the column indices to the file */ 1294 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1295 if (!rank) { 1296 MPI_Status status; 1297 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1298 for (i=1; i<size; i++) { 1299 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1300 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1301 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1302 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1303 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1304 } 1305 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1306 } else { 1307 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1308 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1309 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1310 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1311 } 1312 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1313 1314 /* load up the local column values */ 1315 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1316 cnt = 0; 1317 for (i=0; i<mat->rmap->n; i++) { 1318 for (j=B->i[i]; j<B->i[i+1]; j++) { 1319 if (garray[B->j[j]] > cstart) break; 1320 column_values[cnt++] = B->a[j]; 1321 } 1322 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1323 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1324 } 1325 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1326 1327 /* store the column values to the file */ 1328 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1329 if (!rank) { 1330 MPI_Status status; 1331 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1332 for (i=1; i<size; i++) { 1333 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1334 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1335 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1336 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1337 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1338 } 1339 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1340 } else { 1341 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1342 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1343 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1344 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1345 } 1346 ierr = PetscFree(column_values);CHKERRQ(ierr); 1347 1348 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1349 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #include <petscdraw.h> 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1356 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1357 { 1358 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1359 PetscErrorCode ierr; 1360 PetscMPIInt rank = aij->rank,size = aij->size; 1361 PetscBool isdraw,iascii,isbinary; 1362 PetscViewer sviewer; 1363 PetscViewerFormat format; 1364 1365 PetscFunctionBegin; 1366 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1367 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1368 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1369 if (iascii) { 1370 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1371 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1372 MatInfo info; 1373 PetscBool inodes; 1374 1375 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1376 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1377 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1378 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1379 if (!inodes) { 1380 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1381 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1382 } else { 1383 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1384 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1385 } 1386 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1387 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1388 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1389 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1390 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1391 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1392 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1393 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1394 PetscFunctionReturn(0); 1395 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1396 PetscInt inodecount,inodelimit,*inodes; 1397 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1398 if (inodes) { 1399 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1400 } else { 1401 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1405 PetscFunctionReturn(0); 1406 } 1407 } else if (isbinary) { 1408 if (size == 1) { 1409 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1410 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1411 } else { 1412 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1413 } 1414 PetscFunctionReturn(0); 1415 } else if (isdraw) { 1416 PetscDraw draw; 1417 PetscBool isnull; 1418 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1419 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1420 } 1421 1422 if (size == 1) { 1423 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1424 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1425 } else { 1426 /* assemble the entire matrix onto first processor. */ 1427 Mat A; 1428 Mat_SeqAIJ *Aloc; 1429 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1430 MatScalar *a; 1431 1432 if (mat->rmap->N > 1024) { 1433 PetscBool flg = PETSC_FALSE; 1434 1435 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1436 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1437 } 1438 1439 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1440 if (!rank) { 1441 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1442 } else { 1443 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1444 } 1445 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1446 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1447 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1448 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1449 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1450 1451 /* copy over the A part */ 1452 Aloc = (Mat_SeqAIJ*)aij->A->data; 1453 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1454 row = mat->rmap->rstart; 1455 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1456 for (i=0; i<m; i++) { 1457 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1458 row++; 1459 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1460 } 1461 aj = Aloc->j; 1462 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1463 1464 /* copy over the B part */ 1465 Aloc = (Mat_SeqAIJ*)aij->B->data; 1466 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1467 row = mat->rmap->rstart; 1468 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1469 ct = cols; 1470 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1471 for (i=0; i<m; i++) { 1472 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1473 row++; 1474 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1475 } 1476 ierr = PetscFree(ct);CHKERRQ(ierr); 1477 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1478 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1479 /* 1480 Everyone has to call to draw the matrix since the graphics waits are 1481 synchronized across all processors that share the PetscDraw object 1482 */ 1483 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1484 if (!rank) { 1485 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1486 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1487 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1488 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1489 } 1490 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1491 ierr = MatDestroy(&A);CHKERRQ(ierr); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatView_MPIAIJ" 1498 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1499 { 1500 PetscErrorCode ierr; 1501 PetscBool iascii,isdraw,issocket,isbinary; 1502 1503 PetscFunctionBegin; 1504 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1505 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1506 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1507 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1508 if (iascii || isdraw || isbinary || issocket) { 1509 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatSOR_MPIAIJ" 1516 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1517 { 1518 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1519 PetscErrorCode ierr; 1520 Vec bb1 = 0; 1521 PetscBool hasop; 1522 1523 PetscFunctionBegin; 1524 if (flag == SOR_APPLY_UPPER) { 1525 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1526 PetscFunctionReturn(0); 1527 } 1528 1529 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1530 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1531 } 1532 1533 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1534 if (flag & SOR_ZERO_INITIAL_GUESS) { 1535 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1536 its--; 1537 } 1538 1539 while (its--) { 1540 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1541 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1542 1543 /* update rhs: bb1 = bb - B*x */ 1544 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1545 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1546 1547 /* local sweep */ 1548 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1549 } 1550 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1551 if (flag & SOR_ZERO_INITIAL_GUESS) { 1552 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1553 its--; 1554 } 1555 while (its--) { 1556 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1557 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 1559 /* update rhs: bb1 = bb - B*x */ 1560 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1561 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1562 1563 /* local sweep */ 1564 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1565 } 1566 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1567 if (flag & SOR_ZERO_INITIAL_GUESS) { 1568 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1569 its--; 1570 } 1571 while (its--) { 1572 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 1575 /* update rhs: bb1 = bb - B*x */ 1576 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1577 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1578 1579 /* local sweep */ 1580 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1581 } 1582 } else if (flag & SOR_EISENSTAT) { 1583 Vec xx1; 1584 1585 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1586 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1587 1588 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1589 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 if (!mat->diag) { 1591 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1592 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1593 } 1594 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1595 if (hasop) { 1596 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1597 } else { 1598 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1599 } 1600 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1601 1602 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1603 1604 /* local sweep */ 1605 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1606 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1607 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1608 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1609 1610 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1611 PetscFunctionReturn(0); 1612 } 1613 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatPermute_MPIAIJ" 1616 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1617 { 1618 Mat aA,aB,Aperm; 1619 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1620 PetscScalar *aa,*ba; 1621 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1622 PetscSF rowsf,sf; 1623 IS parcolp = NULL; 1624 PetscBool done; 1625 PetscErrorCode ierr; 1626 1627 PetscFunctionBegin; 1628 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1629 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1630 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1631 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1632 1633 /* Invert row permutation to find out where my rows should go */ 1634 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1635 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1636 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1637 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1638 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1639 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1640 1641 /* Invert column permutation to find out where my columns should go */ 1642 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1643 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1644 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1645 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1646 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1647 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1648 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1649 1650 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1651 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1652 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1653 1654 /* Find out where my gcols should go */ 1655 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1656 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1657 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1658 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1659 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1660 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1661 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1662 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1663 1664 ierr = PetscMalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1665 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1666 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1667 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1668 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1669 for (i=0; i<m; i++) { 1670 PetscInt row = rdest[i],rowner; 1671 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1672 for (j=ai[i]; j<ai[i+1]; j++) { 1673 PetscInt cowner,col = cdest[aj[j]]; 1674 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1675 if (rowner == cowner) dnnz[i]++; 1676 else onnz[i]++; 1677 } 1678 for (j=bi[i]; j<bi[i+1]; j++) { 1679 PetscInt cowner,col = gcdest[bj[j]]; 1680 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1681 if (rowner == cowner) dnnz[i]++; 1682 else onnz[i]++; 1683 } 1684 } 1685 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1686 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1687 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1688 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1689 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1690 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1691 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1692 1693 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1694 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1695 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1696 for (i=0; i<m; i++) { 1697 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1698 PetscInt rowlen; 1699 rowlen = ai[i+1] - ai[i]; 1700 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1701 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1702 rowlen = bi[i+1] - bi[i]; 1703 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1704 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1705 } 1706 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1707 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1708 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1709 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1710 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1711 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1712 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1713 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1714 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1715 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1716 *B = Aperm; 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1722 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1723 { 1724 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1725 Mat A = mat->A,B = mat->B; 1726 PetscErrorCode ierr; 1727 PetscReal isend[5],irecv[5]; 1728 1729 PetscFunctionBegin; 1730 info->block_size = 1.0; 1731 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1732 1733 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1734 isend[3] = info->memory; isend[4] = info->mallocs; 1735 1736 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1737 1738 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1739 isend[3] += info->memory; isend[4] += info->mallocs; 1740 if (flag == MAT_LOCAL) { 1741 info->nz_used = isend[0]; 1742 info->nz_allocated = isend[1]; 1743 info->nz_unneeded = isend[2]; 1744 info->memory = isend[3]; 1745 info->mallocs = isend[4]; 1746 } else if (flag == MAT_GLOBAL_MAX) { 1747 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1748 1749 info->nz_used = irecv[0]; 1750 info->nz_allocated = irecv[1]; 1751 info->nz_unneeded = irecv[2]; 1752 info->memory = irecv[3]; 1753 info->mallocs = irecv[4]; 1754 } else if (flag == MAT_GLOBAL_SUM) { 1755 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1756 1757 info->nz_used = irecv[0]; 1758 info->nz_allocated = irecv[1]; 1759 info->nz_unneeded = irecv[2]; 1760 info->memory = irecv[3]; 1761 info->mallocs = irecv[4]; 1762 } 1763 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1764 info->fill_ratio_needed = 0; 1765 info->factor_mallocs = 0; 1766 PetscFunctionReturn(0); 1767 } 1768 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatSetOption_MPIAIJ" 1771 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1772 { 1773 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1774 PetscErrorCode ierr; 1775 1776 PetscFunctionBegin; 1777 switch (op) { 1778 case MAT_NEW_NONZERO_LOCATIONS: 1779 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1780 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1781 case MAT_KEEP_NONZERO_PATTERN: 1782 case MAT_NEW_NONZERO_LOCATION_ERR: 1783 case MAT_USE_INODES: 1784 case MAT_IGNORE_ZERO_ENTRIES: 1785 MatCheckPreallocated(A,1); 1786 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1787 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1788 break; 1789 case MAT_ROW_ORIENTED: 1790 a->roworiented = flg; 1791 1792 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1793 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1794 break; 1795 case MAT_NEW_DIAGONALS: 1796 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1797 break; 1798 case MAT_IGNORE_OFF_PROC_ENTRIES: 1799 a->donotstash = flg; 1800 break; 1801 case MAT_SPD: 1802 A->spd_set = PETSC_TRUE; 1803 A->spd = flg; 1804 if (flg) { 1805 A->symmetric = PETSC_TRUE; 1806 A->structurally_symmetric = PETSC_TRUE; 1807 A->symmetric_set = PETSC_TRUE; 1808 A->structurally_symmetric_set = PETSC_TRUE; 1809 } 1810 break; 1811 case MAT_SYMMETRIC: 1812 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1813 break; 1814 case MAT_STRUCTURALLY_SYMMETRIC: 1815 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_HERMITIAN: 1818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_SYMMETRY_ETERNAL: 1821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1822 break; 1823 default: 1824 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1825 } 1826 PetscFunctionReturn(0); 1827 } 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatGetRow_MPIAIJ" 1831 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1832 { 1833 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1834 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1835 PetscErrorCode ierr; 1836 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1837 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1838 PetscInt *cmap,*idx_p; 1839 1840 PetscFunctionBegin; 1841 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1842 mat->getrowactive = PETSC_TRUE; 1843 1844 if (!mat->rowvalues && (idx || v)) { 1845 /* 1846 allocate enough space to hold information from the longest row. 1847 */ 1848 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1849 PetscInt max = 1,tmp; 1850 for (i=0; i<matin->rmap->n; i++) { 1851 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1852 if (max < tmp) max = tmp; 1853 } 1854 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1855 } 1856 1857 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1858 lrow = row - rstart; 1859 1860 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1861 if (!v) {pvA = 0; pvB = 0;} 1862 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1863 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1864 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1865 nztot = nzA + nzB; 1866 1867 cmap = mat->garray; 1868 if (v || idx) { 1869 if (nztot) { 1870 /* Sort by increasing column numbers, assuming A and B already sorted */ 1871 PetscInt imark = -1; 1872 if (v) { 1873 *v = v_p = mat->rowvalues; 1874 for (i=0; i<nzB; i++) { 1875 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1876 else break; 1877 } 1878 imark = i; 1879 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1880 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1881 } 1882 if (idx) { 1883 *idx = idx_p = mat->rowindices; 1884 if (imark > -1) { 1885 for (i=0; i<imark; i++) { 1886 idx_p[i] = cmap[cworkB[i]]; 1887 } 1888 } else { 1889 for (i=0; i<nzB; i++) { 1890 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1891 else break; 1892 } 1893 imark = i; 1894 } 1895 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1896 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1897 } 1898 } else { 1899 if (idx) *idx = 0; 1900 if (v) *v = 0; 1901 } 1902 } 1903 *nz = nztot; 1904 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1905 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 #undef __FUNCT__ 1910 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1911 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1912 { 1913 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1914 1915 PetscFunctionBegin; 1916 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1917 aij->getrowactive = PETSC_FALSE; 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatNorm_MPIAIJ" 1923 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1924 { 1925 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1926 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1927 PetscErrorCode ierr; 1928 PetscInt i,j,cstart = mat->cmap->rstart; 1929 PetscReal sum = 0.0; 1930 MatScalar *v; 1931 1932 PetscFunctionBegin; 1933 if (aij->size == 1) { 1934 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1935 } else { 1936 if (type == NORM_FROBENIUS) { 1937 v = amat->a; 1938 for (i=0; i<amat->nz; i++) { 1939 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1940 } 1941 v = bmat->a; 1942 for (i=0; i<bmat->nz; i++) { 1943 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1944 } 1945 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1946 *norm = PetscSqrtReal(*norm); 1947 } else if (type == NORM_1) { /* max column norm */ 1948 PetscReal *tmp,*tmp2; 1949 PetscInt *jj,*garray = aij->garray; 1950 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1951 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1952 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1953 *norm = 0.0; 1954 v = amat->a; jj = amat->j; 1955 for (j=0; j<amat->nz; j++) { 1956 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1957 } 1958 v = bmat->a; jj = bmat->j; 1959 for (j=0; j<bmat->nz; j++) { 1960 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1961 } 1962 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1963 for (j=0; j<mat->cmap->N; j++) { 1964 if (tmp2[j] > *norm) *norm = tmp2[j]; 1965 } 1966 ierr = PetscFree(tmp);CHKERRQ(ierr); 1967 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1968 } else if (type == NORM_INFINITY) { /* max row norm */ 1969 PetscReal ntemp = 0.0; 1970 for (j=0; j<aij->A->rmap->n; j++) { 1971 v = amat->a + amat->i[j]; 1972 sum = 0.0; 1973 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1974 sum += PetscAbsScalar(*v); v++; 1975 } 1976 v = bmat->a + bmat->i[j]; 1977 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1978 sum += PetscAbsScalar(*v); v++; 1979 } 1980 if (sum > ntemp) ntemp = sum; 1981 } 1982 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1983 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1984 } 1985 PetscFunctionReturn(0); 1986 } 1987 1988 #undef __FUNCT__ 1989 #define __FUNCT__ "MatTranspose_MPIAIJ" 1990 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1991 { 1992 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1993 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1994 PetscErrorCode ierr; 1995 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1996 PetscInt cstart = A->cmap->rstart,ncol; 1997 Mat B; 1998 MatScalar *array; 1999 2000 PetscFunctionBegin; 2001 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2002 2003 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2004 ai = Aloc->i; aj = Aloc->j; 2005 bi = Bloc->i; bj = Bloc->j; 2006 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2007 PetscInt *d_nnz,*g_nnz,*o_nnz; 2008 PetscSFNode *oloc; 2009 PETSC_UNUSED PetscSF sf; 2010 2011 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2012 /* compute d_nnz for preallocation */ 2013 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2014 for (i=0; i<ai[ma]; i++) { 2015 d_nnz[aj[i]]++; 2016 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2017 } 2018 /* compute local off-diagonal contributions */ 2019 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2020 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2021 /* map those to global */ 2022 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2023 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2024 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2025 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2026 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2027 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2028 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2029 2030 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2031 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2032 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2033 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2034 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2035 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2036 } else { 2037 B = *matout; 2038 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2039 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2040 } 2041 2042 /* copy over the A part */ 2043 array = Aloc->a; 2044 row = A->rmap->rstart; 2045 for (i=0; i<ma; i++) { 2046 ncol = ai[i+1]-ai[i]; 2047 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2048 row++; 2049 array += ncol; aj += ncol; 2050 } 2051 aj = Aloc->j; 2052 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2053 2054 /* copy over the B part */ 2055 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2056 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2057 array = Bloc->a; 2058 row = A->rmap->rstart; 2059 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2060 cols_tmp = cols; 2061 for (i=0; i<mb; i++) { 2062 ncol = bi[i+1]-bi[i]; 2063 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2064 row++; 2065 array += ncol; cols_tmp += ncol; 2066 } 2067 ierr = PetscFree(cols);CHKERRQ(ierr); 2068 2069 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2070 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2071 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2072 *matout = B; 2073 } else { 2074 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2075 } 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2081 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2082 { 2083 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2084 Mat a = aij->A,b = aij->B; 2085 PetscErrorCode ierr; 2086 PetscInt s1,s2,s3; 2087 2088 PetscFunctionBegin; 2089 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2090 if (rr) { 2091 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2092 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2093 /* Overlap communication with computation. */ 2094 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2095 } 2096 if (ll) { 2097 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2098 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2099 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2100 } 2101 /* scale the diagonal block */ 2102 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2103 2104 if (rr) { 2105 /* Do a scatter end and then right scale the off-diagonal block */ 2106 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2107 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2108 } 2109 PetscFunctionReturn(0); 2110 } 2111 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2114 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2115 { 2116 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2117 PetscErrorCode ierr; 2118 2119 PetscFunctionBegin; 2120 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2121 PetscFunctionReturn(0); 2122 } 2123 2124 #undef __FUNCT__ 2125 #define __FUNCT__ "MatEqual_MPIAIJ" 2126 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2127 { 2128 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2129 Mat a,b,c,d; 2130 PetscBool flg; 2131 PetscErrorCode ierr; 2132 2133 PetscFunctionBegin; 2134 a = matA->A; b = matA->B; 2135 c = matB->A; d = matB->B; 2136 2137 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2138 if (flg) { 2139 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2140 } 2141 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "MatCopy_MPIAIJ" 2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2152 2153 PetscFunctionBegin; 2154 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2155 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2156 /* because of the column compression in the off-processor part of the matrix a->B, 2157 the number of columns in a->B and b->B may be different, hence we cannot call 2158 the MatCopy() directly on the two parts. If need be, we can provide a more 2159 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2160 then copying the submatrices */ 2161 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2162 } else { 2163 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2164 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2165 } 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatSetUp_MPIAIJ" 2171 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2172 { 2173 PetscErrorCode ierr; 2174 2175 PetscFunctionBegin; 2176 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2177 PetscFunctionReturn(0); 2178 } 2179 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2182 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2183 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2184 { 2185 PetscInt i,m=Y->rmap->N; 2186 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2187 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2188 const PetscInt *xi = x->i,*yi = y->i; 2189 2190 PetscFunctionBegin; 2191 /* Set the number of nonzeros in the new matrix */ 2192 for (i=0; i<m; i++) { 2193 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2194 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2195 nnz[i] = 0; 2196 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2197 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2198 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2199 nnz[i]++; 2200 } 2201 for (; k<nzy; k++) nnz[i]++; 2202 } 2203 PetscFunctionReturn(0); 2204 } 2205 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "MatAXPY_MPIAIJ" 2208 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2209 { 2210 PetscErrorCode ierr; 2211 PetscInt i; 2212 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2213 PetscBLASInt bnz,one=1; 2214 Mat_SeqAIJ *x,*y; 2215 2216 PetscFunctionBegin; 2217 if (str == SAME_NONZERO_PATTERN) { 2218 PetscScalar alpha = a; 2219 x = (Mat_SeqAIJ*)xx->A->data; 2220 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2221 y = (Mat_SeqAIJ*)yy->A->data; 2222 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2223 x = (Mat_SeqAIJ*)xx->B->data; 2224 y = (Mat_SeqAIJ*)yy->B->data; 2225 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2226 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2227 } else if (str == SUBSET_NONZERO_PATTERN) { 2228 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2229 2230 x = (Mat_SeqAIJ*)xx->B->data; 2231 y = (Mat_SeqAIJ*)yy->B->data; 2232 if (y->xtoy && y->XtoY != xx->B) { 2233 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2234 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2235 } 2236 if (!y->xtoy) { /* get xtoy */ 2237 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2238 y->XtoY = xx->B; 2239 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2240 } 2241 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2242 } else { 2243 Mat B; 2244 PetscInt *nnz_d,*nnz_o; 2245 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2246 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2247 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2248 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2249 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2250 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2251 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2253 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2254 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2255 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2256 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatConjugate_MPIAIJ" 2267 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2268 { 2269 #if defined(PETSC_USE_COMPLEX) 2270 PetscErrorCode ierr; 2271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2272 2273 PetscFunctionBegin; 2274 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2275 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2276 #else 2277 PetscFunctionBegin; 2278 #endif 2279 PetscFunctionReturn(0); 2280 } 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatRealPart_MPIAIJ" 2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2285 { 2286 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2291 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2297 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2298 { 2299 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2300 PetscErrorCode ierr; 2301 2302 PetscFunctionBegin; 2303 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2304 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #if defined(PETSC_HAVE_PBGL) 2309 2310 #include <boost/parallel/mpi/bsp_process_group.hpp> 2311 #include <boost/graph/distributed/ilu_default_graph.hpp> 2312 #include <boost/graph/distributed/ilu_0_block.hpp> 2313 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2314 #include <boost/graph/distributed/petsc/interface.hpp> 2315 #include <boost/multi_array.hpp> 2316 #include <boost/parallel/distributed_property_map->hpp> 2317 2318 #undef __FUNCT__ 2319 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2320 /* 2321 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2322 */ 2323 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2324 { 2325 namespace petsc = boost::distributed::petsc; 2326 2327 namespace graph_dist = boost::graph::distributed; 2328 using boost::graph::distributed::ilu_default::process_group_type; 2329 using boost::graph::ilu_permuted; 2330 2331 PetscBool row_identity, col_identity; 2332 PetscContainer c; 2333 PetscInt m, n, M, N; 2334 PetscErrorCode ierr; 2335 2336 PetscFunctionBegin; 2337 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2338 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2339 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2340 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2341 2342 process_group_type pg; 2343 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2344 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2345 lgraph_type& level_graph = *lgraph_p; 2346 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2347 2348 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2349 ilu_permuted(level_graph); 2350 2351 /* put together the new matrix */ 2352 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2353 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2354 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2355 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2356 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2357 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2358 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2359 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 2361 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2362 ierr = PetscContainerSetPointer(c, lgraph_p); 2363 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2364 ierr = PetscContainerDestroy(&c); 2365 PetscFunctionReturn(0); 2366 } 2367 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2370 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2371 { 2372 PetscFunctionBegin; 2373 PetscFunctionReturn(0); 2374 } 2375 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatSolve_MPIAIJ" 2378 /* 2379 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2380 */ 2381 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2382 { 2383 namespace graph_dist = boost::graph::distributed; 2384 2385 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2386 lgraph_type *lgraph_p; 2387 PetscContainer c; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2392 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2393 ierr = VecCopy(b, x);CHKERRQ(ierr); 2394 2395 PetscScalar *array_x; 2396 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2397 PetscInt sx; 2398 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2399 2400 PetscScalar *array_b; 2401 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2402 PetscInt sb; 2403 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2404 2405 lgraph_type& level_graph = *lgraph_p; 2406 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2407 2408 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2409 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2410 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2411 2412 typedef boost::iterator_property_map<array_ref_type::iterator, 2413 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2414 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2415 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2416 2417 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2418 PetscFunctionReturn(0); 2419 } 2420 #endif 2421 2422 #undef __FUNCT__ 2423 #define __FUNCT__ "MatDestroy_MatRedundant" 2424 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2425 { 2426 PetscErrorCode ierr; 2427 Mat_Redundant *redund; 2428 PetscInt i; 2429 PetscMPIInt size; 2430 2431 PetscFunctionBegin; 2432 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2433 if (size == 1) { 2434 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2435 redund = a->redundant; 2436 } else { 2437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2438 redund = a->redundant; 2439 } 2440 if (redund){ 2441 if (redund->matseq) { /* via MatGetSubMatrices() */ 2442 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2443 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2444 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2445 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2446 } else { 2447 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2448 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2449 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2450 for (i=0; i<redund->nrecvs; i++) { 2451 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2452 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2453 } 2454 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2455 } 2456 2457 if (redund->psubcomm) { 2458 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2459 } 2460 ierr = redund->Destroy(A);CHKERRQ(ierr); 2461 ierr = PetscFree(redund);CHKERRQ(ierr); 2462 } 2463 PetscFunctionReturn(0); 2464 } 2465 2466 #undef __FUNCT__ 2467 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2468 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2469 { 2470 PetscMPIInt rank,size; 2471 MPI_Comm comm; 2472 PetscErrorCode ierr; 2473 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2474 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2475 PetscInt *rowrange = mat->rmap->range; 2476 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2477 Mat A = aij->A,B=aij->B,C=*matredundant; 2478 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2479 PetscScalar *sbuf_a; 2480 PetscInt nzlocal=a->nz+b->nz; 2481 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2482 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2483 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2484 MatScalar *aworkA,*aworkB; 2485 PetscScalar *vals; 2486 PetscMPIInt tag1,tag2,tag3,imdex; 2487 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2488 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2489 MPI_Status recv_status,*send_status; 2490 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2491 PetscInt **rbuf_j=NULL; 2492 PetscScalar **rbuf_a=NULL; 2493 Mat_Redundant *redund =NULL; 2494 2495 PetscFunctionBegin; 2496 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2497 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2498 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2499 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2500 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2501 2502 if (reuse == MAT_REUSE_MATRIX) { 2503 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2504 if (subsize == 1) { 2505 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2506 redund = c->redundant; 2507 } else { 2508 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2509 redund = c->redundant; 2510 } 2511 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2512 2513 nsends = redund->nsends; 2514 nrecvs = redund->nrecvs; 2515 send_rank = redund->send_rank; 2516 recv_rank = redund->recv_rank; 2517 sbuf_nz = redund->sbuf_nz; 2518 rbuf_nz = redund->rbuf_nz; 2519 sbuf_j = redund->sbuf_j; 2520 sbuf_a = redund->sbuf_a; 2521 rbuf_j = redund->rbuf_j; 2522 rbuf_a = redund->rbuf_a; 2523 } 2524 2525 if (reuse == MAT_INITIAL_MATRIX) { 2526 PetscInt nleftover,np_subcomm; 2527 2528 /* get the destination processors' id send_rank, nsends and nrecvs */ 2529 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2530 2531 np_subcomm = size/nsubcomm; 2532 nleftover = size - nsubcomm*np_subcomm; 2533 2534 /* block of codes below is specific for INTERLACED */ 2535 /* ------------------------------------------------*/ 2536 nsends = 0; nrecvs = 0; 2537 for (i=0; i<size; i++) { 2538 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2539 send_rank[nsends++] = i; 2540 recv_rank[nrecvs++] = i; 2541 } 2542 } 2543 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2544 i = size-nleftover-1; 2545 j = 0; 2546 while (j < nsubcomm - nleftover) { 2547 send_rank[nsends++] = i; 2548 i--; j++; 2549 } 2550 } 2551 2552 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2553 for (i=0; i<nleftover; i++) { 2554 recv_rank[nrecvs++] = size-nleftover+i; 2555 } 2556 } 2557 /*----------------------------------------------*/ 2558 2559 /* allocate sbuf_j, sbuf_a */ 2560 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2561 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2562 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2563 /* 2564 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2565 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2566 */ 2567 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2568 2569 /* copy mat's local entries into the buffers */ 2570 if (reuse == MAT_INITIAL_MATRIX) { 2571 rownz_max = 0; 2572 rptr = sbuf_j; 2573 cols = sbuf_j + rend-rstart + 1; 2574 vals = sbuf_a; 2575 rptr[0] = 0; 2576 for (i=0; i<rend-rstart; i++) { 2577 row = i + rstart; 2578 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2579 ncols = nzA + nzB; 2580 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2581 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2582 /* load the column indices for this row into cols */ 2583 lwrite = 0; 2584 for (l=0; l<nzB; l++) { 2585 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2586 vals[lwrite] = aworkB[l]; 2587 cols[lwrite++] = ctmp; 2588 } 2589 } 2590 for (l=0; l<nzA; l++) { 2591 vals[lwrite] = aworkA[l]; 2592 cols[lwrite++] = cstart + cworkA[l]; 2593 } 2594 for (l=0; l<nzB; l++) { 2595 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2596 vals[lwrite] = aworkB[l]; 2597 cols[lwrite++] = ctmp; 2598 } 2599 } 2600 vals += ncols; 2601 cols += ncols; 2602 rptr[i+1] = rptr[i] + ncols; 2603 if (rownz_max < ncols) rownz_max = ncols; 2604 } 2605 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2606 } else { /* only copy matrix values into sbuf_a */ 2607 rptr = sbuf_j; 2608 vals = sbuf_a; 2609 rptr[0] = 0; 2610 for (i=0; i<rend-rstart; i++) { 2611 row = i + rstart; 2612 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2613 ncols = nzA + nzB; 2614 cworkB = b->j + b->i[i]; 2615 aworkA = a->a + a->i[i]; 2616 aworkB = b->a + b->i[i]; 2617 lwrite = 0; 2618 for (l=0; l<nzB; l++) { 2619 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2620 } 2621 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2622 for (l=0; l<nzB; l++) { 2623 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2624 } 2625 vals += ncols; 2626 rptr[i+1] = rptr[i] + ncols; 2627 } 2628 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2629 2630 /* send nzlocal to others, and recv other's nzlocal */ 2631 /*--------------------------------------------------*/ 2632 if (reuse == MAT_INITIAL_MATRIX) { 2633 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2634 2635 s_waits2 = s_waits3 + nsends; 2636 s_waits1 = s_waits2 + nsends; 2637 r_waits1 = s_waits1 + nsends; 2638 r_waits2 = r_waits1 + nrecvs; 2639 r_waits3 = r_waits2 + nrecvs; 2640 } else { 2641 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2642 2643 r_waits3 = s_waits3 + nsends; 2644 } 2645 2646 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2647 if (reuse == MAT_INITIAL_MATRIX) { 2648 /* get new tags to keep the communication clean */ 2649 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2650 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2651 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2652 2653 /* post receives of other's nzlocal */ 2654 for (i=0; i<nrecvs; i++) { 2655 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2656 } 2657 /* send nzlocal to others */ 2658 for (i=0; i<nsends; i++) { 2659 sbuf_nz[i] = nzlocal; 2660 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2661 } 2662 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2663 count = nrecvs; 2664 while (count) { 2665 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2666 2667 recv_rank[imdex] = recv_status.MPI_SOURCE; 2668 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2669 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2670 2671 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2672 2673 rbuf_nz[imdex] += i + 2; 2674 2675 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2676 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2677 count--; 2678 } 2679 /* wait on sends of nzlocal */ 2680 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2681 /* send mat->i,j to others, and recv from other's */ 2682 /*------------------------------------------------*/ 2683 for (i=0; i<nsends; i++) { 2684 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2685 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2686 } 2687 /* wait on receives of mat->i,j */ 2688 /*------------------------------*/ 2689 count = nrecvs; 2690 while (count) { 2691 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2692 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2693 count--; 2694 } 2695 /* wait on sends of mat->i,j */ 2696 /*---------------------------*/ 2697 if (nsends) { 2698 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2699 } 2700 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2701 2702 /* post receives, send and receive mat->a */ 2703 /*----------------------------------------*/ 2704 for (imdex=0; imdex<nrecvs; imdex++) { 2705 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2706 } 2707 for (i=0; i<nsends; i++) { 2708 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2709 } 2710 count = nrecvs; 2711 while (count) { 2712 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2713 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2714 count--; 2715 } 2716 if (nsends) { 2717 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2718 } 2719 2720 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2721 2722 /* create redundant matrix */ 2723 /*-------------------------*/ 2724 if (reuse == MAT_INITIAL_MATRIX) { 2725 const PetscInt *range; 2726 PetscInt rstart_sub,rend_sub,mloc_sub; 2727 2728 /* compute rownz_max for preallocation */ 2729 for (imdex=0; imdex<nrecvs; imdex++) { 2730 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2731 rptr = rbuf_j[imdex]; 2732 for (i=0; i<j; i++) { 2733 ncols = rptr[i+1] - rptr[i]; 2734 if (rownz_max < ncols) rownz_max = ncols; 2735 } 2736 } 2737 2738 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2739 2740 /* get local size of redundant matrix 2741 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2742 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2743 rstart_sub = range[nsubcomm*subrank]; 2744 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2745 rend_sub = range[nsubcomm*(subrank+1)]; 2746 } else { 2747 rend_sub = mat->rmap->N; 2748 } 2749 mloc_sub = rend_sub - rstart_sub; 2750 2751 if (M == N) { 2752 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2753 } else { /* non-square matrix */ 2754 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2755 } 2756 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2757 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2758 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2759 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2760 } else { 2761 C = *matredundant; 2762 } 2763 2764 /* insert local matrix entries */ 2765 rptr = sbuf_j; 2766 cols = sbuf_j + rend-rstart + 1; 2767 vals = sbuf_a; 2768 for (i=0; i<rend-rstart; i++) { 2769 row = i + rstart; 2770 ncols = rptr[i+1] - rptr[i]; 2771 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2772 vals += ncols; 2773 cols += ncols; 2774 } 2775 /* insert received matrix entries */ 2776 for (imdex=0; imdex<nrecvs; imdex++) { 2777 rstart = rowrange[recv_rank[imdex]]; 2778 rend = rowrange[recv_rank[imdex]+1]; 2779 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2780 rptr = rbuf_j[imdex]; 2781 cols = rbuf_j[imdex] + rend-rstart + 1; 2782 vals = rbuf_a[imdex]; 2783 for (i=0; i<rend-rstart; i++) { 2784 row = i + rstart; 2785 ncols = rptr[i+1] - rptr[i]; 2786 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2787 vals += ncols; 2788 cols += ncols; 2789 } 2790 } 2791 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2792 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2793 2794 if (reuse == MAT_INITIAL_MATRIX) { 2795 *matredundant = C; 2796 2797 /* create a supporting struct and attach it to C for reuse */ 2798 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2799 if (subsize == 1) { 2800 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2801 c->redundant = redund; 2802 } else { 2803 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2804 c->redundant = redund; 2805 } 2806 2807 redund->nzlocal = nzlocal; 2808 redund->nsends = nsends; 2809 redund->nrecvs = nrecvs; 2810 redund->send_rank = send_rank; 2811 redund->recv_rank = recv_rank; 2812 redund->sbuf_nz = sbuf_nz; 2813 redund->rbuf_nz = rbuf_nz; 2814 redund->sbuf_j = sbuf_j; 2815 redund->sbuf_a = sbuf_a; 2816 redund->rbuf_j = rbuf_j; 2817 redund->rbuf_a = rbuf_a; 2818 redund->psubcomm = NULL; 2819 2820 redund->Destroy = C->ops->destroy; 2821 C->ops->destroy = MatDestroy_MatRedundant; 2822 } 2823 PetscFunctionReturn(0); 2824 } 2825 2826 #undef __FUNCT__ 2827 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2828 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2829 { 2830 PetscErrorCode ierr; 2831 MPI_Comm comm; 2832 PetscMPIInt size,subsize; 2833 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2834 Mat_Redundant *redund=NULL; 2835 PetscSubcomm psubcomm=NULL; 2836 MPI_Comm subcomm_in=subcomm; 2837 Mat *matseq; 2838 IS isrow,iscol; 2839 2840 PetscFunctionBegin; 2841 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2842 if (reuse == MAT_INITIAL_MATRIX) { 2843 /* create psubcomm, then get subcomm */ 2844 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2845 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2846 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2847 2848 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2849 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2850 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2851 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2852 subcomm = psubcomm->comm; 2853 } else { /* retrieve psubcomm and subcomm */ 2854 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2855 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2856 if (subsize == 1) { 2857 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2858 redund = c->redundant; 2859 } else { 2860 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2861 redund = c->redundant; 2862 } 2863 psubcomm = redund->psubcomm; 2864 } 2865 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2866 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2867 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2868 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2869 if (subsize == 1) { 2870 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2871 c->redundant->psubcomm = psubcomm; 2872 } else { 2873 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2874 c->redundant->psubcomm = psubcomm ; 2875 } 2876 } 2877 PetscFunctionReturn(0); 2878 } 2879 } 2880 2881 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2882 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2883 if (reuse == MAT_INITIAL_MATRIX) { 2884 /* create a local sequential matrix matseq[0] */ 2885 mloc_sub = PETSC_DECIDE; 2886 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2887 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2888 rstart = rend - mloc_sub; 2889 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2890 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2891 } else { /* reuse == MAT_REUSE_MATRIX */ 2892 if (subsize == 1) { 2893 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2894 redund = c->redundant; 2895 } else { 2896 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2897 redund = c->redundant; 2898 } 2899 2900 isrow = redund->isrow; 2901 iscol = redund->iscol; 2902 matseq = redund->matseq; 2903 } 2904 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2905 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2906 2907 if (reuse == MAT_INITIAL_MATRIX) { 2908 /* create a supporting struct and attach it to C for reuse */ 2909 ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr); 2910 if (subsize == 1) { 2911 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2912 c->redundant = redund; 2913 } else { 2914 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2915 c->redundant = redund; 2916 } 2917 redund->isrow = isrow; 2918 redund->iscol = iscol; 2919 redund->matseq = matseq; 2920 redund->psubcomm = psubcomm; 2921 redund->Destroy = (*matredundant)->ops->destroy; 2922 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2923 } 2924 PetscFunctionReturn(0); 2925 } 2926 2927 #undef __FUNCT__ 2928 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2929 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2930 { 2931 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2932 PetscErrorCode ierr; 2933 PetscInt i,*idxb = 0; 2934 PetscScalar *va,*vb; 2935 Vec vtmp; 2936 2937 PetscFunctionBegin; 2938 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2939 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2940 if (idx) { 2941 for (i=0; i<A->rmap->n; i++) { 2942 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2943 } 2944 } 2945 2946 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2947 if (idx) { 2948 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2949 } 2950 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2951 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2952 2953 for (i=0; i<A->rmap->n; i++) { 2954 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2955 va[i] = vb[i]; 2956 if (idx) idx[i] = a->garray[idxb[i]]; 2957 } 2958 } 2959 2960 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2961 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2962 ierr = PetscFree(idxb);CHKERRQ(ierr); 2963 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2964 PetscFunctionReturn(0); 2965 } 2966 2967 #undef __FUNCT__ 2968 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2969 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2970 { 2971 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2972 PetscErrorCode ierr; 2973 PetscInt i,*idxb = 0; 2974 PetscScalar *va,*vb; 2975 Vec vtmp; 2976 2977 PetscFunctionBegin; 2978 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2979 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2980 if (idx) { 2981 for (i=0; i<A->cmap->n; i++) { 2982 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2983 } 2984 } 2985 2986 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2987 if (idx) { 2988 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2989 } 2990 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2991 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2992 2993 for (i=0; i<A->rmap->n; i++) { 2994 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2995 va[i] = vb[i]; 2996 if (idx) idx[i] = a->garray[idxb[i]]; 2997 } 2998 } 2999 3000 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3001 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3002 ierr = PetscFree(idxb);CHKERRQ(ierr); 3003 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3004 PetscFunctionReturn(0); 3005 } 3006 3007 #undef __FUNCT__ 3008 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3009 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3010 { 3011 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3012 PetscInt n = A->rmap->n; 3013 PetscInt cstart = A->cmap->rstart; 3014 PetscInt *cmap = mat->garray; 3015 PetscInt *diagIdx, *offdiagIdx; 3016 Vec diagV, offdiagV; 3017 PetscScalar *a, *diagA, *offdiagA; 3018 PetscInt r; 3019 PetscErrorCode ierr; 3020 3021 PetscFunctionBegin; 3022 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 3023 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3024 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3025 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3026 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3027 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3028 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3029 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3030 for (r = 0; r < n; ++r) { 3031 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3032 a[r] = diagA[r]; 3033 idx[r] = cstart + diagIdx[r]; 3034 } else { 3035 a[r] = offdiagA[r]; 3036 idx[r] = cmap[offdiagIdx[r]]; 3037 } 3038 } 3039 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3040 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3041 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3042 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3043 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3044 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3045 PetscFunctionReturn(0); 3046 } 3047 3048 #undef __FUNCT__ 3049 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3050 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3051 { 3052 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3053 PetscInt n = A->rmap->n; 3054 PetscInt cstart = A->cmap->rstart; 3055 PetscInt *cmap = mat->garray; 3056 PetscInt *diagIdx, *offdiagIdx; 3057 Vec diagV, offdiagV; 3058 PetscScalar *a, *diagA, *offdiagA; 3059 PetscInt r; 3060 PetscErrorCode ierr; 3061 3062 PetscFunctionBegin; 3063 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 3064 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3065 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3066 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3067 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3068 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3069 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3070 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3071 for (r = 0; r < n; ++r) { 3072 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3073 a[r] = diagA[r]; 3074 idx[r] = cstart + diagIdx[r]; 3075 } else { 3076 a[r] = offdiagA[r]; 3077 idx[r] = cmap[offdiagIdx[r]]; 3078 } 3079 } 3080 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3081 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3082 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3083 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3084 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3085 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3086 PetscFunctionReturn(0); 3087 } 3088 3089 #undef __FUNCT__ 3090 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3091 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3092 { 3093 PetscErrorCode ierr; 3094 Mat *dummy; 3095 3096 PetscFunctionBegin; 3097 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3098 *newmat = *dummy; 3099 ierr = PetscFree(dummy);CHKERRQ(ierr); 3100 PetscFunctionReturn(0); 3101 } 3102 3103 #undef __FUNCT__ 3104 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3105 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3106 { 3107 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3108 PetscErrorCode ierr; 3109 3110 PetscFunctionBegin; 3111 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3112 PetscFunctionReturn(0); 3113 } 3114 3115 #undef __FUNCT__ 3116 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3117 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3118 { 3119 PetscErrorCode ierr; 3120 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3121 3122 PetscFunctionBegin; 3123 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3124 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3125 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3126 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3127 PetscFunctionReturn(0); 3128 } 3129 3130 /* -------------------------------------------------------------------*/ 3131 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3132 MatGetRow_MPIAIJ, 3133 MatRestoreRow_MPIAIJ, 3134 MatMult_MPIAIJ, 3135 /* 4*/ MatMultAdd_MPIAIJ, 3136 MatMultTranspose_MPIAIJ, 3137 MatMultTransposeAdd_MPIAIJ, 3138 #if defined(PETSC_HAVE_PBGL) 3139 MatSolve_MPIAIJ, 3140 #else 3141 0, 3142 #endif 3143 0, 3144 0, 3145 /*10*/ 0, 3146 0, 3147 0, 3148 MatSOR_MPIAIJ, 3149 MatTranspose_MPIAIJ, 3150 /*15*/ MatGetInfo_MPIAIJ, 3151 MatEqual_MPIAIJ, 3152 MatGetDiagonal_MPIAIJ, 3153 MatDiagonalScale_MPIAIJ, 3154 MatNorm_MPIAIJ, 3155 /*20*/ MatAssemblyBegin_MPIAIJ, 3156 MatAssemblyEnd_MPIAIJ, 3157 MatSetOption_MPIAIJ, 3158 MatZeroEntries_MPIAIJ, 3159 /*24*/ MatZeroRows_MPIAIJ, 3160 0, 3161 #if defined(PETSC_HAVE_PBGL) 3162 0, 3163 #else 3164 0, 3165 #endif 3166 0, 3167 0, 3168 /*29*/ MatSetUp_MPIAIJ, 3169 #if defined(PETSC_HAVE_PBGL) 3170 0, 3171 #else 3172 0, 3173 #endif 3174 0, 3175 0, 3176 0, 3177 /*34*/ MatDuplicate_MPIAIJ, 3178 0, 3179 0, 3180 0, 3181 0, 3182 /*39*/ MatAXPY_MPIAIJ, 3183 MatGetSubMatrices_MPIAIJ, 3184 MatIncreaseOverlap_MPIAIJ, 3185 MatGetValues_MPIAIJ, 3186 MatCopy_MPIAIJ, 3187 /*44*/ MatGetRowMax_MPIAIJ, 3188 MatScale_MPIAIJ, 3189 0, 3190 0, 3191 MatZeroRowsColumns_MPIAIJ, 3192 /*49*/ MatSetRandom_MPIAIJ, 3193 0, 3194 0, 3195 0, 3196 0, 3197 /*54*/ MatFDColoringCreate_MPIXAIJ, 3198 0, 3199 MatSetUnfactored_MPIAIJ, 3200 MatPermute_MPIAIJ, 3201 0, 3202 /*59*/ MatGetSubMatrix_MPIAIJ, 3203 MatDestroy_MPIAIJ, 3204 MatView_MPIAIJ, 3205 0, 3206 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3207 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3208 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3209 0, 3210 0, 3211 0, 3212 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3213 MatGetRowMinAbs_MPIAIJ, 3214 0, 3215 MatSetColoring_MPIAIJ, 3216 0, 3217 MatSetValuesAdifor_MPIAIJ, 3218 /*75*/ MatFDColoringApply_AIJ, 3219 0, 3220 0, 3221 0, 3222 MatFindZeroDiagonals_MPIAIJ, 3223 /*80*/ 0, 3224 0, 3225 0, 3226 /*83*/ MatLoad_MPIAIJ, 3227 0, 3228 0, 3229 0, 3230 0, 3231 0, 3232 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3233 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3234 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3235 MatPtAP_MPIAIJ_MPIAIJ, 3236 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3237 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3238 0, 3239 0, 3240 0, 3241 0, 3242 /*99*/ 0, 3243 0, 3244 0, 3245 MatConjugate_MPIAIJ, 3246 0, 3247 /*104*/MatSetValuesRow_MPIAIJ, 3248 MatRealPart_MPIAIJ, 3249 MatImaginaryPart_MPIAIJ, 3250 0, 3251 0, 3252 /*109*/0, 3253 MatGetRedundantMatrix_MPIAIJ, 3254 MatGetRowMin_MPIAIJ, 3255 0, 3256 0, 3257 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3258 0, 3259 0, 3260 0, 3261 0, 3262 /*119*/0, 3263 0, 3264 0, 3265 0, 3266 MatGetMultiProcBlock_MPIAIJ, 3267 /*124*/MatFindNonzeroRows_MPIAIJ, 3268 MatGetColumnNorms_MPIAIJ, 3269 MatInvertBlockDiagonal_MPIAIJ, 3270 0, 3271 MatGetSubMatricesParallel_MPIAIJ, 3272 /*129*/0, 3273 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3274 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3275 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3276 0, 3277 /*134*/0, 3278 0, 3279 0, 3280 0, 3281 0, 3282 /*139*/0, 3283 0, 3284 0, 3285 MatFDColoringSetUp_MPIXAIJ 3286 }; 3287 3288 /* ----------------------------------------------------------------------------------------*/ 3289 3290 #undef __FUNCT__ 3291 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3292 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3293 { 3294 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3295 PetscErrorCode ierr; 3296 3297 PetscFunctionBegin; 3298 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3299 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3300 PetscFunctionReturn(0); 3301 } 3302 3303 #undef __FUNCT__ 3304 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3305 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3306 { 3307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3308 PetscErrorCode ierr; 3309 3310 PetscFunctionBegin; 3311 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3312 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3313 PetscFunctionReturn(0); 3314 } 3315 3316 #undef __FUNCT__ 3317 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3318 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3319 { 3320 Mat_MPIAIJ *b; 3321 PetscErrorCode ierr; 3322 3323 PetscFunctionBegin; 3324 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3325 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3326 b = (Mat_MPIAIJ*)B->data; 3327 3328 if (!B->preallocated) { 3329 /* Explicitly create 2 MATSEQAIJ matrices. */ 3330 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3331 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3332 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3333 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3334 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3335 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3336 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3337 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3338 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3339 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3340 } 3341 3342 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3343 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3344 B->preallocated = PETSC_TRUE; 3345 PetscFunctionReturn(0); 3346 } 3347 3348 #undef __FUNCT__ 3349 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3350 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3351 { 3352 Mat mat; 3353 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3354 PetscErrorCode ierr; 3355 3356 PetscFunctionBegin; 3357 *newmat = 0; 3358 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3359 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3360 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3361 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3362 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3363 a = (Mat_MPIAIJ*)mat->data; 3364 3365 mat->factortype = matin->factortype; 3366 mat->rmap->bs = matin->rmap->bs; 3367 mat->cmap->bs = matin->cmap->bs; 3368 mat->assembled = PETSC_TRUE; 3369 mat->insertmode = NOT_SET_VALUES; 3370 mat->preallocated = PETSC_TRUE; 3371 3372 a->size = oldmat->size; 3373 a->rank = oldmat->rank; 3374 a->donotstash = oldmat->donotstash; 3375 a->roworiented = oldmat->roworiented; 3376 a->rowindices = 0; 3377 a->rowvalues = 0; 3378 a->getrowactive = PETSC_FALSE; 3379 3380 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3381 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3382 3383 if (oldmat->colmap) { 3384 #if defined(PETSC_USE_CTABLE) 3385 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3386 #else 3387 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3388 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3389 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3390 #endif 3391 } else a->colmap = 0; 3392 if (oldmat->garray) { 3393 PetscInt len; 3394 len = oldmat->B->cmap->n; 3395 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3396 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3397 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3398 } else a->garray = 0; 3399 3400 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3401 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3402 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3403 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3404 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3405 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3406 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3407 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3408 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3409 *newmat = mat; 3410 PetscFunctionReturn(0); 3411 } 3412 3413 3414 3415 #undef __FUNCT__ 3416 #define __FUNCT__ "MatLoad_MPIAIJ" 3417 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3418 { 3419 PetscScalar *vals,*svals; 3420 MPI_Comm comm; 3421 PetscErrorCode ierr; 3422 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3423 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3424 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3425 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3426 PetscInt cend,cstart,n,*rowners,sizesset=1; 3427 int fd; 3428 PetscInt bs = 1; 3429 3430 PetscFunctionBegin; 3431 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3432 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3433 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3434 if (!rank) { 3435 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3436 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3437 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3438 } 3439 3440 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3441 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3442 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3443 3444 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3445 3446 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3447 M = header[1]; N = header[2]; 3448 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3449 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3450 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3451 3452 /* If global sizes are set, check if they are consistent with that given in the file */ 3453 if (sizesset) { 3454 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3455 } 3456 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3457 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3458 3459 /* determine ownership of all (block) rows */ 3460 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3461 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3462 else m = newMat->rmap->n; /* Set by user */ 3463 3464 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3465 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3466 3467 /* First process needs enough room for process with most rows */ 3468 if (!rank) { 3469 mmax = rowners[1]; 3470 for (i=2; i<=size; i++) { 3471 mmax = PetscMax(mmax, rowners[i]); 3472 } 3473 } else mmax = -1; /* unused, but compilers complain */ 3474 3475 rowners[0] = 0; 3476 for (i=2; i<=size; i++) { 3477 rowners[i] += rowners[i-1]; 3478 } 3479 rstart = rowners[rank]; 3480 rend = rowners[rank+1]; 3481 3482 /* distribute row lengths to all processors */ 3483 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3484 if (!rank) { 3485 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3486 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3487 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3488 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3489 for (j=0; j<m; j++) { 3490 procsnz[0] += ourlens[j]; 3491 } 3492 for (i=1; i<size; i++) { 3493 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3494 /* calculate the number of nonzeros on each processor */ 3495 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3496 procsnz[i] += rowlengths[j]; 3497 } 3498 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3499 } 3500 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3501 } else { 3502 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3503 } 3504 3505 if (!rank) { 3506 /* determine max buffer needed and allocate it */ 3507 maxnz = 0; 3508 for (i=0; i<size; i++) { 3509 maxnz = PetscMax(maxnz,procsnz[i]); 3510 } 3511 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3512 3513 /* read in my part of the matrix column indices */ 3514 nz = procsnz[0]; 3515 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3516 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3517 3518 /* read in every one elses and ship off */ 3519 for (i=1; i<size; i++) { 3520 nz = procsnz[i]; 3521 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3522 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3523 } 3524 ierr = PetscFree(cols);CHKERRQ(ierr); 3525 } else { 3526 /* determine buffer space needed for message */ 3527 nz = 0; 3528 for (i=0; i<m; i++) { 3529 nz += ourlens[i]; 3530 } 3531 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3532 3533 /* receive message of column indices*/ 3534 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3535 } 3536 3537 /* determine column ownership if matrix is not square */ 3538 if (N != M) { 3539 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3540 else n = newMat->cmap->n; 3541 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3542 cstart = cend - n; 3543 } else { 3544 cstart = rstart; 3545 cend = rend; 3546 n = cend - cstart; 3547 } 3548 3549 /* loop over local rows, determining number of off diagonal entries */ 3550 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3551 jj = 0; 3552 for (i=0; i<m; i++) { 3553 for (j=0; j<ourlens[i]; j++) { 3554 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3555 jj++; 3556 } 3557 } 3558 3559 for (i=0; i<m; i++) { 3560 ourlens[i] -= offlens[i]; 3561 } 3562 if (!sizesset) { 3563 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3564 } 3565 3566 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3567 3568 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3569 3570 for (i=0; i<m; i++) { 3571 ourlens[i] += offlens[i]; 3572 } 3573 3574 if (!rank) { 3575 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3576 3577 /* read in my part of the matrix numerical values */ 3578 nz = procsnz[0]; 3579 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3580 3581 /* insert into matrix */ 3582 jj = rstart; 3583 smycols = mycols; 3584 svals = vals; 3585 for (i=0; i<m; i++) { 3586 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3587 smycols += ourlens[i]; 3588 svals += ourlens[i]; 3589 jj++; 3590 } 3591 3592 /* read in other processors and ship out */ 3593 for (i=1; i<size; i++) { 3594 nz = procsnz[i]; 3595 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3596 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3597 } 3598 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3599 } else { 3600 /* receive numeric values */ 3601 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3602 3603 /* receive message of values*/ 3604 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3605 3606 /* insert into matrix */ 3607 jj = rstart; 3608 smycols = mycols; 3609 svals = vals; 3610 for (i=0; i<m; i++) { 3611 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3612 smycols += ourlens[i]; 3613 svals += ourlens[i]; 3614 jj++; 3615 } 3616 } 3617 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3618 ierr = PetscFree(vals);CHKERRQ(ierr); 3619 ierr = PetscFree(mycols);CHKERRQ(ierr); 3620 ierr = PetscFree(rowners);CHKERRQ(ierr); 3621 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3622 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3623 PetscFunctionReturn(0); 3624 } 3625 3626 #undef __FUNCT__ 3627 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3628 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3629 { 3630 PetscErrorCode ierr; 3631 IS iscol_local; 3632 PetscInt csize; 3633 3634 PetscFunctionBegin; 3635 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3636 if (call == MAT_REUSE_MATRIX) { 3637 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3638 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3639 } else { 3640 PetscInt cbs; 3641 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3642 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3643 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3644 } 3645 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3646 if (call == MAT_INITIAL_MATRIX) { 3647 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3648 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3649 } 3650 PetscFunctionReturn(0); 3651 } 3652 3653 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3654 #undef __FUNCT__ 3655 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3656 /* 3657 Not great since it makes two copies of the submatrix, first an SeqAIJ 3658 in local and then by concatenating the local matrices the end result. 3659 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3660 3661 Note: This requires a sequential iscol with all indices. 3662 */ 3663 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3664 { 3665 PetscErrorCode ierr; 3666 PetscMPIInt rank,size; 3667 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3668 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3669 PetscBool allcolumns, colflag; 3670 Mat M,Mreuse; 3671 MatScalar *vwork,*aa; 3672 MPI_Comm comm; 3673 Mat_SeqAIJ *aij; 3674 3675 PetscFunctionBegin; 3676 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3677 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3678 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3679 3680 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3681 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3682 if (colflag && ncol == mat->cmap->N) { 3683 allcolumns = PETSC_TRUE; 3684 } else { 3685 allcolumns = PETSC_FALSE; 3686 } 3687 if (call == MAT_REUSE_MATRIX) { 3688 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3689 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3690 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3691 } else { 3692 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3693 } 3694 3695 /* 3696 m - number of local rows 3697 n - number of columns (same on all processors) 3698 rstart - first row in new global matrix generated 3699 */ 3700 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3701 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3702 if (call == MAT_INITIAL_MATRIX) { 3703 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3704 ii = aij->i; 3705 jj = aij->j; 3706 3707 /* 3708 Determine the number of non-zeros in the diagonal and off-diagonal 3709 portions of the matrix in order to do correct preallocation 3710 */ 3711 3712 /* first get start and end of "diagonal" columns */ 3713 if (csize == PETSC_DECIDE) { 3714 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3715 if (mglobal == n) { /* square matrix */ 3716 nlocal = m; 3717 } else { 3718 nlocal = n/size + ((n % size) > rank); 3719 } 3720 } else { 3721 nlocal = csize; 3722 } 3723 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3724 rstart = rend - nlocal; 3725 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3726 3727 /* next, compute all the lengths */ 3728 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3729 olens = dlens + m; 3730 for (i=0; i<m; i++) { 3731 jend = ii[i+1] - ii[i]; 3732 olen = 0; 3733 dlen = 0; 3734 for (j=0; j<jend; j++) { 3735 if (*jj < rstart || *jj >= rend) olen++; 3736 else dlen++; 3737 jj++; 3738 } 3739 olens[i] = olen; 3740 dlens[i] = dlen; 3741 } 3742 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3743 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3744 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3745 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3746 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3747 ierr = PetscFree(dlens);CHKERRQ(ierr); 3748 } else { 3749 PetscInt ml,nl; 3750 3751 M = *newmat; 3752 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3753 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3754 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3755 /* 3756 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3757 rather than the slower MatSetValues(). 3758 */ 3759 M->was_assembled = PETSC_TRUE; 3760 M->assembled = PETSC_FALSE; 3761 } 3762 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3763 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3764 ii = aij->i; 3765 jj = aij->j; 3766 aa = aij->a; 3767 for (i=0; i<m; i++) { 3768 row = rstart + i; 3769 nz = ii[i+1] - ii[i]; 3770 cwork = jj; jj += nz; 3771 vwork = aa; aa += nz; 3772 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3773 } 3774 3775 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3776 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3777 *newmat = M; 3778 3779 /* save submatrix used in processor for next request */ 3780 if (call == MAT_INITIAL_MATRIX) { 3781 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3782 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3783 } 3784 PetscFunctionReturn(0); 3785 } 3786 3787 #undef __FUNCT__ 3788 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3789 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3790 { 3791 PetscInt m,cstart, cend,j,nnz,i,d; 3792 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3793 const PetscInt *JJ; 3794 PetscScalar *values; 3795 PetscErrorCode ierr; 3796 3797 PetscFunctionBegin; 3798 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3799 3800 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3801 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3802 m = B->rmap->n; 3803 cstart = B->cmap->rstart; 3804 cend = B->cmap->rend; 3805 rstart = B->rmap->rstart; 3806 3807 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3808 3809 #if defined(PETSC_USE_DEBUGGING) 3810 for (i=0; i<m; i++) { 3811 nnz = Ii[i+1]- Ii[i]; 3812 JJ = J + Ii[i]; 3813 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3814 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3815 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3816 } 3817 #endif 3818 3819 for (i=0; i<m; i++) { 3820 nnz = Ii[i+1]- Ii[i]; 3821 JJ = J + Ii[i]; 3822 nnz_max = PetscMax(nnz_max,nnz); 3823 d = 0; 3824 for (j=0; j<nnz; j++) { 3825 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3826 } 3827 d_nnz[i] = d; 3828 o_nnz[i] = nnz - d; 3829 } 3830 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3831 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3832 3833 if (v) values = (PetscScalar*)v; 3834 else { 3835 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3836 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3837 } 3838 3839 for (i=0; i<m; i++) { 3840 ii = i + rstart; 3841 nnz = Ii[i+1]- Ii[i]; 3842 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3843 } 3844 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3845 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3846 3847 if (!v) { 3848 ierr = PetscFree(values);CHKERRQ(ierr); 3849 } 3850 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3851 PetscFunctionReturn(0); 3852 } 3853 3854 #undef __FUNCT__ 3855 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3856 /*@ 3857 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3858 (the default parallel PETSc format). 3859 3860 Collective on MPI_Comm 3861 3862 Input Parameters: 3863 + B - the matrix 3864 . i - the indices into j for the start of each local row (starts with zero) 3865 . j - the column indices for each local row (starts with zero) 3866 - v - optional values in the matrix 3867 3868 Level: developer 3869 3870 Notes: 3871 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3872 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3873 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3874 3875 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3876 3877 The format which is used for the sparse matrix input, is equivalent to a 3878 row-major ordering.. i.e for the following matrix, the input data expected is 3879 as shown: 3880 3881 1 0 0 3882 2 0 3 P0 3883 ------- 3884 4 5 6 P1 3885 3886 Process0 [P0]: rows_owned=[0,1] 3887 i = {0,1,3} [size = nrow+1 = 2+1] 3888 j = {0,0,2} [size = nz = 6] 3889 v = {1,2,3} [size = nz = 6] 3890 3891 Process1 [P1]: rows_owned=[2] 3892 i = {0,3} [size = nrow+1 = 1+1] 3893 j = {0,1,2} [size = nz = 6] 3894 v = {4,5,6} [size = nz = 6] 3895 3896 .keywords: matrix, aij, compressed row, sparse, parallel 3897 3898 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3899 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3900 @*/ 3901 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3902 { 3903 PetscErrorCode ierr; 3904 3905 PetscFunctionBegin; 3906 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3907 PetscFunctionReturn(0); 3908 } 3909 3910 #undef __FUNCT__ 3911 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3912 /*@C 3913 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3914 (the default parallel PETSc format). For good matrix assembly performance 3915 the user should preallocate the matrix storage by setting the parameters 3916 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3917 performance can be increased by more than a factor of 50. 3918 3919 Collective on MPI_Comm 3920 3921 Input Parameters: 3922 + A - the matrix 3923 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3924 (same value is used for all local rows) 3925 . d_nnz - array containing the number of nonzeros in the various rows of the 3926 DIAGONAL portion of the local submatrix (possibly different for each row) 3927 or NULL, if d_nz is used to specify the nonzero structure. 3928 The size of this array is equal to the number of local rows, i.e 'm'. 3929 For matrices that will be factored, you must leave room for (and set) 3930 the diagonal entry even if it is zero. 3931 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3932 submatrix (same value is used for all local rows). 3933 - o_nnz - array containing the number of nonzeros in the various rows of the 3934 OFF-DIAGONAL portion of the local submatrix (possibly different for 3935 each row) or NULL, if o_nz is used to specify the nonzero 3936 structure. The size of this array is equal to the number 3937 of local rows, i.e 'm'. 3938 3939 If the *_nnz parameter is given then the *_nz parameter is ignored 3940 3941 The AIJ format (also called the Yale sparse matrix format or 3942 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3943 storage. The stored row and column indices begin with zero. 3944 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3945 3946 The parallel matrix is partitioned such that the first m0 rows belong to 3947 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3948 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3949 3950 The DIAGONAL portion of the local submatrix of a processor can be defined 3951 as the submatrix which is obtained by extraction the part corresponding to 3952 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3953 first row that belongs to the processor, r2 is the last row belonging to 3954 the this processor, and c1-c2 is range of indices of the local part of a 3955 vector suitable for applying the matrix to. This is an mxn matrix. In the 3956 common case of a square matrix, the row and column ranges are the same and 3957 the DIAGONAL part is also square. The remaining portion of the local 3958 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3959 3960 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3961 3962 You can call MatGetInfo() to get information on how effective the preallocation was; 3963 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3964 You can also run with the option -info and look for messages with the string 3965 malloc in them to see if additional memory allocation was needed. 3966 3967 Example usage: 3968 3969 Consider the following 8x8 matrix with 34 non-zero values, that is 3970 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3971 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3972 as follows: 3973 3974 .vb 3975 1 2 0 | 0 3 0 | 0 4 3976 Proc0 0 5 6 | 7 0 0 | 8 0 3977 9 0 10 | 11 0 0 | 12 0 3978 ------------------------------------- 3979 13 0 14 | 15 16 17 | 0 0 3980 Proc1 0 18 0 | 19 20 21 | 0 0 3981 0 0 0 | 22 23 0 | 24 0 3982 ------------------------------------- 3983 Proc2 25 26 27 | 0 0 28 | 29 0 3984 30 0 0 | 31 32 33 | 0 34 3985 .ve 3986 3987 This can be represented as a collection of submatrices as: 3988 3989 .vb 3990 A B C 3991 D E F 3992 G H I 3993 .ve 3994 3995 Where the submatrices A,B,C are owned by proc0, D,E,F are 3996 owned by proc1, G,H,I are owned by proc2. 3997 3998 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3999 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4000 The 'M','N' parameters are 8,8, and have the same values on all procs. 4001 4002 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4003 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4004 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4005 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4006 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4007 matrix, ans [DF] as another SeqAIJ matrix. 4008 4009 When d_nz, o_nz parameters are specified, d_nz storage elements are 4010 allocated for every row of the local diagonal submatrix, and o_nz 4011 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4012 One way to choose d_nz and o_nz is to use the max nonzerors per local 4013 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4014 In this case, the values of d_nz,o_nz are: 4015 .vb 4016 proc0 : dnz = 2, o_nz = 2 4017 proc1 : dnz = 3, o_nz = 2 4018 proc2 : dnz = 1, o_nz = 4 4019 .ve 4020 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4021 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4022 for proc3. i.e we are using 12+15+10=37 storage locations to store 4023 34 values. 4024 4025 When d_nnz, o_nnz parameters are specified, the storage is specified 4026 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4027 In the above case the values for d_nnz,o_nnz are: 4028 .vb 4029 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4030 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4031 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4032 .ve 4033 Here the space allocated is sum of all the above values i.e 34, and 4034 hence pre-allocation is perfect. 4035 4036 Level: intermediate 4037 4038 .keywords: matrix, aij, compressed row, sparse, parallel 4039 4040 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4041 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4042 @*/ 4043 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4044 { 4045 PetscErrorCode ierr; 4046 4047 PetscFunctionBegin; 4048 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4049 PetscValidType(B,1); 4050 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4051 PetscFunctionReturn(0); 4052 } 4053 4054 #undef __FUNCT__ 4055 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4056 /*@ 4057 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4058 CSR format the local rows. 4059 4060 Collective on MPI_Comm 4061 4062 Input Parameters: 4063 + comm - MPI communicator 4064 . m - number of local rows (Cannot be PETSC_DECIDE) 4065 . n - This value should be the same as the local size used in creating the 4066 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4067 calculated if N is given) For square matrices n is almost always m. 4068 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4069 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4070 . i - row indices 4071 . j - column indices 4072 - a - matrix values 4073 4074 Output Parameter: 4075 . mat - the matrix 4076 4077 Level: intermediate 4078 4079 Notes: 4080 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4081 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4082 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4083 4084 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4085 4086 The format which is used for the sparse matrix input, is equivalent to a 4087 row-major ordering.. i.e for the following matrix, the input data expected is 4088 as shown: 4089 4090 1 0 0 4091 2 0 3 P0 4092 ------- 4093 4 5 6 P1 4094 4095 Process0 [P0]: rows_owned=[0,1] 4096 i = {0,1,3} [size = nrow+1 = 2+1] 4097 j = {0,0,2} [size = nz = 6] 4098 v = {1,2,3} [size = nz = 6] 4099 4100 Process1 [P1]: rows_owned=[2] 4101 i = {0,3} [size = nrow+1 = 1+1] 4102 j = {0,1,2} [size = nz = 6] 4103 v = {4,5,6} [size = nz = 6] 4104 4105 .keywords: matrix, aij, compressed row, sparse, parallel 4106 4107 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4108 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4109 @*/ 4110 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4111 { 4112 PetscErrorCode ierr; 4113 4114 PetscFunctionBegin; 4115 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4116 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4117 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4118 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4119 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4120 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4121 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4122 PetscFunctionReturn(0); 4123 } 4124 4125 #undef __FUNCT__ 4126 #define __FUNCT__ "MatCreateAIJ" 4127 /*@C 4128 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4129 (the default parallel PETSc format). For good matrix assembly performance 4130 the user should preallocate the matrix storage by setting the parameters 4131 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4132 performance can be increased by more than a factor of 50. 4133 4134 Collective on MPI_Comm 4135 4136 Input Parameters: 4137 + comm - MPI communicator 4138 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4139 This value should be the same as the local size used in creating the 4140 y vector for the matrix-vector product y = Ax. 4141 . n - This value should be the same as the local size used in creating the 4142 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4143 calculated if N is given) For square matrices n is almost always m. 4144 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4145 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4146 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4147 (same value is used for all local rows) 4148 . d_nnz - array containing the number of nonzeros in the various rows of the 4149 DIAGONAL portion of the local submatrix (possibly different for each row) 4150 or NULL, if d_nz is used to specify the nonzero structure. 4151 The size of this array is equal to the number of local rows, i.e 'm'. 4152 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4153 submatrix (same value is used for all local rows). 4154 - o_nnz - array containing the number of nonzeros in the various rows of the 4155 OFF-DIAGONAL portion of the local submatrix (possibly different for 4156 each row) or NULL, if o_nz is used to specify the nonzero 4157 structure. The size of this array is equal to the number 4158 of local rows, i.e 'm'. 4159 4160 Output Parameter: 4161 . A - the matrix 4162 4163 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4164 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4165 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4166 4167 Notes: 4168 If the *_nnz parameter is given then the *_nz parameter is ignored 4169 4170 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4171 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4172 storage requirements for this matrix. 4173 4174 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4175 processor than it must be used on all processors that share the object for 4176 that argument. 4177 4178 The user MUST specify either the local or global matrix dimensions 4179 (possibly both). 4180 4181 The parallel matrix is partitioned across processors such that the 4182 first m0 rows belong to process 0, the next m1 rows belong to 4183 process 1, the next m2 rows belong to process 2 etc.. where 4184 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4185 values corresponding to [m x N] submatrix. 4186 4187 The columns are logically partitioned with the n0 columns belonging 4188 to 0th partition, the next n1 columns belonging to the next 4189 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4190 4191 The DIAGONAL portion of the local submatrix on any given processor 4192 is the submatrix corresponding to the rows and columns m,n 4193 corresponding to the given processor. i.e diagonal matrix on 4194 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4195 etc. The remaining portion of the local submatrix [m x (N-n)] 4196 constitute the OFF-DIAGONAL portion. The example below better 4197 illustrates this concept. 4198 4199 For a square global matrix we define each processor's diagonal portion 4200 to be its local rows and the corresponding columns (a square submatrix); 4201 each processor's off-diagonal portion encompasses the remainder of the 4202 local matrix (a rectangular submatrix). 4203 4204 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4205 4206 When calling this routine with a single process communicator, a matrix of 4207 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4208 type of communicator, use the construction mechanism: 4209 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4210 4211 By default, this format uses inodes (identical nodes) when possible. 4212 We search for consecutive rows with the same nonzero structure, thereby 4213 reusing matrix information to achieve increased efficiency. 4214 4215 Options Database Keys: 4216 + -mat_no_inode - Do not use inodes 4217 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4218 - -mat_aij_oneindex - Internally use indexing starting at 1 4219 rather than 0. Note that when calling MatSetValues(), 4220 the user still MUST index entries starting at 0! 4221 4222 4223 Example usage: 4224 4225 Consider the following 8x8 matrix with 34 non-zero values, that is 4226 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4227 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4228 as follows: 4229 4230 .vb 4231 1 2 0 | 0 3 0 | 0 4 4232 Proc0 0 5 6 | 7 0 0 | 8 0 4233 9 0 10 | 11 0 0 | 12 0 4234 ------------------------------------- 4235 13 0 14 | 15 16 17 | 0 0 4236 Proc1 0 18 0 | 19 20 21 | 0 0 4237 0 0 0 | 22 23 0 | 24 0 4238 ------------------------------------- 4239 Proc2 25 26 27 | 0 0 28 | 29 0 4240 30 0 0 | 31 32 33 | 0 34 4241 .ve 4242 4243 This can be represented as a collection of submatrices as: 4244 4245 .vb 4246 A B C 4247 D E F 4248 G H I 4249 .ve 4250 4251 Where the submatrices A,B,C are owned by proc0, D,E,F are 4252 owned by proc1, G,H,I are owned by proc2. 4253 4254 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4255 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4256 The 'M','N' parameters are 8,8, and have the same values on all procs. 4257 4258 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4259 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4260 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4261 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4262 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4263 matrix, ans [DF] as another SeqAIJ matrix. 4264 4265 When d_nz, o_nz parameters are specified, d_nz storage elements are 4266 allocated for every row of the local diagonal submatrix, and o_nz 4267 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4268 One way to choose d_nz and o_nz is to use the max nonzerors per local 4269 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4270 In this case, the values of d_nz,o_nz are: 4271 .vb 4272 proc0 : dnz = 2, o_nz = 2 4273 proc1 : dnz = 3, o_nz = 2 4274 proc2 : dnz = 1, o_nz = 4 4275 .ve 4276 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4277 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4278 for proc3. i.e we are using 12+15+10=37 storage locations to store 4279 34 values. 4280 4281 When d_nnz, o_nnz parameters are specified, the storage is specified 4282 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4283 In the above case the values for d_nnz,o_nnz are: 4284 .vb 4285 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4286 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4287 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4288 .ve 4289 Here the space allocated is sum of all the above values i.e 34, and 4290 hence pre-allocation is perfect. 4291 4292 Level: intermediate 4293 4294 .keywords: matrix, aij, compressed row, sparse, parallel 4295 4296 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4297 MPIAIJ, MatCreateMPIAIJWithArrays() 4298 @*/ 4299 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4300 { 4301 PetscErrorCode ierr; 4302 PetscMPIInt size; 4303 4304 PetscFunctionBegin; 4305 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4306 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4307 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4308 if (size > 1) { 4309 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4310 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4311 } else { 4312 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4313 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4314 } 4315 PetscFunctionReturn(0); 4316 } 4317 4318 #undef __FUNCT__ 4319 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4320 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4321 { 4322 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4323 4324 PetscFunctionBegin; 4325 *Ad = a->A; 4326 *Ao = a->B; 4327 *colmap = a->garray; 4328 PetscFunctionReturn(0); 4329 } 4330 4331 #undef __FUNCT__ 4332 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4333 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4334 { 4335 PetscErrorCode ierr; 4336 PetscInt i; 4337 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4338 4339 PetscFunctionBegin; 4340 if (coloring->ctype == IS_COLORING_GLOBAL) { 4341 ISColoringValue *allcolors,*colors; 4342 ISColoring ocoloring; 4343 4344 /* set coloring for diagonal portion */ 4345 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4346 4347 /* set coloring for off-diagonal portion */ 4348 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4349 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4350 for (i=0; i<a->B->cmap->n; i++) { 4351 colors[i] = allcolors[a->garray[i]]; 4352 } 4353 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4354 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4355 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4356 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4357 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4358 ISColoringValue *colors; 4359 PetscInt *larray; 4360 ISColoring ocoloring; 4361 4362 /* set coloring for diagonal portion */ 4363 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4364 for (i=0; i<a->A->cmap->n; i++) { 4365 larray[i] = i + A->cmap->rstart; 4366 } 4367 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4368 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4369 for (i=0; i<a->A->cmap->n; i++) { 4370 colors[i] = coloring->colors[larray[i]]; 4371 } 4372 ierr = PetscFree(larray);CHKERRQ(ierr); 4373 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4374 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4375 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4376 4377 /* set coloring for off-diagonal portion */ 4378 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4379 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4380 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4381 for (i=0; i<a->B->cmap->n; i++) { 4382 colors[i] = coloring->colors[larray[i]]; 4383 } 4384 ierr = PetscFree(larray);CHKERRQ(ierr); 4385 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4386 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4387 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4388 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4389 PetscFunctionReturn(0); 4390 } 4391 4392 #undef __FUNCT__ 4393 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4394 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4395 { 4396 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4397 PetscErrorCode ierr; 4398 4399 PetscFunctionBegin; 4400 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4401 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4402 PetscFunctionReturn(0); 4403 } 4404 4405 #undef __FUNCT__ 4406 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4407 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4408 { 4409 PetscErrorCode ierr; 4410 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4411 PetscInt *indx; 4412 4413 PetscFunctionBegin; 4414 /* This routine will ONLY return MPIAIJ type matrix */ 4415 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4416 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4417 if (n == PETSC_DECIDE) { 4418 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4419 } 4420 /* Check sum(n) = N */ 4421 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4422 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4423 4424 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4425 rstart -= m; 4426 4427 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4428 for (i=0; i<m; i++) { 4429 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4430 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4431 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4432 } 4433 4434 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4435 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4436 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4437 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4438 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4439 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4440 PetscFunctionReturn(0); 4441 } 4442 4443 #undef __FUNCT__ 4444 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4445 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4446 { 4447 PetscErrorCode ierr; 4448 PetscInt m,N,i,rstart,nnz,Ii; 4449 PetscInt *indx; 4450 PetscScalar *values; 4451 4452 PetscFunctionBegin; 4453 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4454 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4455 for (i=0; i<m; i++) { 4456 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4457 Ii = i + rstart; 4458 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4459 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4460 } 4461 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4462 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4463 PetscFunctionReturn(0); 4464 } 4465 4466 #undef __FUNCT__ 4467 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4468 /*@ 4469 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4470 matrices from each processor 4471 4472 Collective on MPI_Comm 4473 4474 Input Parameters: 4475 + comm - the communicators the parallel matrix will live on 4476 . inmat - the input sequential matrices 4477 . n - number of local columns (or PETSC_DECIDE) 4478 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4479 4480 Output Parameter: 4481 . outmat - the parallel matrix generated 4482 4483 Level: advanced 4484 4485 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4486 4487 @*/ 4488 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4489 { 4490 PetscErrorCode ierr; 4491 PetscMPIInt size; 4492 4493 PetscFunctionBegin; 4494 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4495 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4496 if (size == 1) { 4497 if (scall == MAT_INITIAL_MATRIX) { 4498 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4499 } else { 4500 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4501 } 4502 } else { 4503 if (scall == MAT_INITIAL_MATRIX) { 4504 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4505 } 4506 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4507 } 4508 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "MatFileSplit" 4514 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4515 { 4516 PetscErrorCode ierr; 4517 PetscMPIInt rank; 4518 PetscInt m,N,i,rstart,nnz; 4519 size_t len; 4520 const PetscInt *indx; 4521 PetscViewer out; 4522 char *name; 4523 Mat B; 4524 const PetscScalar *values; 4525 4526 PetscFunctionBegin; 4527 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4528 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4529 /* Should this be the type of the diagonal block of A? */ 4530 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4531 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4532 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4533 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4534 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4535 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4536 for (i=0; i<m; i++) { 4537 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4538 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4539 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4540 } 4541 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4542 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4543 4544 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4545 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4546 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4547 sprintf(name,"%s.%d",outfile,rank); 4548 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4549 ierr = PetscFree(name);CHKERRQ(ierr); 4550 ierr = MatView(B,out);CHKERRQ(ierr); 4551 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4552 ierr = MatDestroy(&B);CHKERRQ(ierr); 4553 PetscFunctionReturn(0); 4554 } 4555 4556 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4557 #undef __FUNCT__ 4558 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4559 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4560 { 4561 PetscErrorCode ierr; 4562 Mat_Merge_SeqsToMPI *merge; 4563 PetscContainer container; 4564 4565 PetscFunctionBegin; 4566 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4567 if (container) { 4568 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4569 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4570 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4571 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4572 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4574 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4575 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4576 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4577 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4578 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4579 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4580 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4581 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4582 ierr = PetscFree(merge);CHKERRQ(ierr); 4583 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4584 } 4585 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4586 PetscFunctionReturn(0); 4587 } 4588 4589 #include <../src/mat/utils/freespace.h> 4590 #include <petscbt.h> 4591 4592 #undef __FUNCT__ 4593 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4594 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4595 { 4596 PetscErrorCode ierr; 4597 MPI_Comm comm; 4598 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4599 PetscMPIInt size,rank,taga,*len_s; 4600 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4601 PetscInt proc,m; 4602 PetscInt **buf_ri,**buf_rj; 4603 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4604 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4605 MPI_Request *s_waits,*r_waits; 4606 MPI_Status *status; 4607 MatScalar *aa=a->a; 4608 MatScalar **abuf_r,*ba_i; 4609 Mat_Merge_SeqsToMPI *merge; 4610 PetscContainer container; 4611 4612 PetscFunctionBegin; 4613 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4614 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4615 4616 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4617 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4618 4619 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4620 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4621 4622 bi = merge->bi; 4623 bj = merge->bj; 4624 buf_ri = merge->buf_ri; 4625 buf_rj = merge->buf_rj; 4626 4627 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4628 owners = merge->rowmap->range; 4629 len_s = merge->len_s; 4630 4631 /* send and recv matrix values */ 4632 /*-----------------------------*/ 4633 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4634 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4635 4636 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4637 for (proc=0,k=0; proc<size; proc++) { 4638 if (!len_s[proc]) continue; 4639 i = owners[proc]; 4640 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4641 k++; 4642 } 4643 4644 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4645 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4646 ierr = PetscFree(status);CHKERRQ(ierr); 4647 4648 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4649 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4650 4651 /* insert mat values of mpimat */ 4652 /*----------------------------*/ 4653 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4654 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4655 4656 for (k=0; k<merge->nrecv; k++) { 4657 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4658 nrows = *(buf_ri_k[k]); 4659 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4660 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4661 } 4662 4663 /* set values of ba */ 4664 m = merge->rowmap->n; 4665 for (i=0; i<m; i++) { 4666 arow = owners[rank] + i; 4667 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4668 bnzi = bi[i+1] - bi[i]; 4669 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4670 4671 /* add local non-zero vals of this proc's seqmat into ba */ 4672 anzi = ai[arow+1] - ai[arow]; 4673 aj = a->j + ai[arow]; 4674 aa = a->a + ai[arow]; 4675 nextaj = 0; 4676 for (j=0; nextaj<anzi; j++) { 4677 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4678 ba_i[j] += aa[nextaj++]; 4679 } 4680 } 4681 4682 /* add received vals into ba */ 4683 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4684 /* i-th row */ 4685 if (i == *nextrow[k]) { 4686 anzi = *(nextai[k]+1) - *nextai[k]; 4687 aj = buf_rj[k] + *(nextai[k]); 4688 aa = abuf_r[k] + *(nextai[k]); 4689 nextaj = 0; 4690 for (j=0; nextaj<anzi; j++) { 4691 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4692 ba_i[j] += aa[nextaj++]; 4693 } 4694 } 4695 nextrow[k]++; nextai[k]++; 4696 } 4697 } 4698 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4699 } 4700 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4701 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4702 4703 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4704 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4705 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4706 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4707 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4708 PetscFunctionReturn(0); 4709 } 4710 4711 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4712 4713 #undef __FUNCT__ 4714 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4715 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4716 { 4717 PetscErrorCode ierr; 4718 Mat B_mpi; 4719 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4720 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4721 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4722 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4723 PetscInt len,proc,*dnz,*onz,bs,cbs; 4724 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4725 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4726 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4727 MPI_Status *status; 4728 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4729 PetscBT lnkbt; 4730 Mat_Merge_SeqsToMPI *merge; 4731 PetscContainer container; 4732 4733 PetscFunctionBegin; 4734 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4735 4736 /* make sure it is a PETSc comm */ 4737 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4738 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4739 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4740 4741 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4742 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4743 4744 /* determine row ownership */ 4745 /*---------------------------------------------------------*/ 4746 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4747 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4748 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4749 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4750 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4751 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4752 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4753 4754 m = merge->rowmap->n; 4755 owners = merge->rowmap->range; 4756 4757 /* determine the number of messages to send, their lengths */ 4758 /*---------------------------------------------------------*/ 4759 len_s = merge->len_s; 4760 4761 len = 0; /* length of buf_si[] */ 4762 merge->nsend = 0; 4763 for (proc=0; proc<size; proc++) { 4764 len_si[proc] = 0; 4765 if (proc == rank) { 4766 len_s[proc] = 0; 4767 } else { 4768 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4769 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4770 } 4771 if (len_s[proc]) { 4772 merge->nsend++; 4773 nrows = 0; 4774 for (i=owners[proc]; i<owners[proc+1]; i++) { 4775 if (ai[i+1] > ai[i]) nrows++; 4776 } 4777 len_si[proc] = 2*(nrows+1); 4778 len += len_si[proc]; 4779 } 4780 } 4781 4782 /* determine the number and length of messages to receive for ij-structure */ 4783 /*-------------------------------------------------------------------------*/ 4784 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4785 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4786 4787 /* post the Irecv of j-structure */ 4788 /*-------------------------------*/ 4789 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4790 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4791 4792 /* post the Isend of j-structure */ 4793 /*--------------------------------*/ 4794 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4795 4796 for (proc=0, k=0; proc<size; proc++) { 4797 if (!len_s[proc]) continue; 4798 i = owners[proc]; 4799 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4800 k++; 4801 } 4802 4803 /* receives and sends of j-structure are complete */ 4804 /*------------------------------------------------*/ 4805 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4806 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4807 4808 /* send and recv i-structure */ 4809 /*---------------------------*/ 4810 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4811 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4812 4813 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4814 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4815 for (proc=0,k=0; proc<size; proc++) { 4816 if (!len_s[proc]) continue; 4817 /* form outgoing message for i-structure: 4818 buf_si[0]: nrows to be sent 4819 [1:nrows]: row index (global) 4820 [nrows+1:2*nrows+1]: i-structure index 4821 */ 4822 /*-------------------------------------------*/ 4823 nrows = len_si[proc]/2 - 1; 4824 buf_si_i = buf_si + nrows+1; 4825 buf_si[0] = nrows; 4826 buf_si_i[0] = 0; 4827 nrows = 0; 4828 for (i=owners[proc]; i<owners[proc+1]; i++) { 4829 anzi = ai[i+1] - ai[i]; 4830 if (anzi) { 4831 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4832 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4833 nrows++; 4834 } 4835 } 4836 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4837 k++; 4838 buf_si += len_si[proc]; 4839 } 4840 4841 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4842 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4843 4844 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4845 for (i=0; i<merge->nrecv; i++) { 4846 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4847 } 4848 4849 ierr = PetscFree(len_si);CHKERRQ(ierr); 4850 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4851 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4852 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4853 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4854 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4855 ierr = PetscFree(status);CHKERRQ(ierr); 4856 4857 /* compute a local seq matrix in each processor */ 4858 /*----------------------------------------------*/ 4859 /* allocate bi array and free space for accumulating nonzero column info */ 4860 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4861 bi[0] = 0; 4862 4863 /* create and initialize a linked list */ 4864 nlnk = N+1; 4865 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4866 4867 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4868 len = ai[owners[rank+1]] - ai[owners[rank]]; 4869 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4870 4871 current_space = free_space; 4872 4873 /* determine symbolic info for each local row */ 4874 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4875 4876 for (k=0; k<merge->nrecv; k++) { 4877 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4878 nrows = *buf_ri_k[k]; 4879 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4880 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4881 } 4882 4883 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4884 len = 0; 4885 for (i=0; i<m; i++) { 4886 bnzi = 0; 4887 /* add local non-zero cols of this proc's seqmat into lnk */ 4888 arow = owners[rank] + i; 4889 anzi = ai[arow+1] - ai[arow]; 4890 aj = a->j + ai[arow]; 4891 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4892 bnzi += nlnk; 4893 /* add received col data into lnk */ 4894 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4895 if (i == *nextrow[k]) { /* i-th row */ 4896 anzi = *(nextai[k]+1) - *nextai[k]; 4897 aj = buf_rj[k] + *nextai[k]; 4898 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4899 bnzi += nlnk; 4900 nextrow[k]++; nextai[k]++; 4901 } 4902 } 4903 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4904 4905 /* if free space is not available, make more free space */ 4906 if (current_space->local_remaining<bnzi) { 4907 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4908 nspacedouble++; 4909 } 4910 /* copy data into free space, then initialize lnk */ 4911 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4912 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4913 4914 current_space->array += bnzi; 4915 current_space->local_used += bnzi; 4916 current_space->local_remaining -= bnzi; 4917 4918 bi[i+1] = bi[i] + bnzi; 4919 } 4920 4921 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4922 4923 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4924 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4925 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4926 4927 /* create symbolic parallel matrix B_mpi */ 4928 /*---------------------------------------*/ 4929 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4930 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4931 if (n==PETSC_DECIDE) { 4932 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4933 } else { 4934 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4935 } 4936 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4937 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4938 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4939 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4940 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4941 4942 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4943 B_mpi->assembled = PETSC_FALSE; 4944 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4945 merge->bi = bi; 4946 merge->bj = bj; 4947 merge->buf_ri = buf_ri; 4948 merge->buf_rj = buf_rj; 4949 merge->coi = NULL; 4950 merge->coj = NULL; 4951 merge->owners_co = NULL; 4952 4953 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4954 4955 /* attach the supporting struct to B_mpi for reuse */ 4956 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4957 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4958 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4959 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4960 *mpimat = B_mpi; 4961 4962 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4963 PetscFunctionReturn(0); 4964 } 4965 4966 #undef __FUNCT__ 4967 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4968 /*@C 4969 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4970 matrices from each processor 4971 4972 Collective on MPI_Comm 4973 4974 Input Parameters: 4975 + comm - the communicators the parallel matrix will live on 4976 . seqmat - the input sequential matrices 4977 . m - number of local rows (or PETSC_DECIDE) 4978 . n - number of local columns (or PETSC_DECIDE) 4979 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4980 4981 Output Parameter: 4982 . mpimat - the parallel matrix generated 4983 4984 Level: advanced 4985 4986 Notes: 4987 The dimensions of the sequential matrix in each processor MUST be the same. 4988 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4989 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4990 @*/ 4991 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4992 { 4993 PetscErrorCode ierr; 4994 PetscMPIInt size; 4995 4996 PetscFunctionBegin; 4997 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4998 if (size == 1) { 4999 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5000 if (scall == MAT_INITIAL_MATRIX) { 5001 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5002 } else { 5003 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5004 } 5005 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5006 PetscFunctionReturn(0); 5007 } 5008 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5009 if (scall == MAT_INITIAL_MATRIX) { 5010 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5011 } 5012 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5013 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5014 PetscFunctionReturn(0); 5015 } 5016 5017 #undef __FUNCT__ 5018 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5019 /*@ 5020 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5021 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5022 with MatGetSize() 5023 5024 Not Collective 5025 5026 Input Parameters: 5027 + A - the matrix 5028 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5029 5030 Output Parameter: 5031 . A_loc - the local sequential matrix generated 5032 5033 Level: developer 5034 5035 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5036 5037 @*/ 5038 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5039 { 5040 PetscErrorCode ierr; 5041 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5042 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5043 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5044 MatScalar *aa=a->a,*ba=b->a,*cam; 5045 PetscScalar *ca; 5046 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5047 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5048 PetscBool match; 5049 5050 PetscFunctionBegin; 5051 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5052 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5053 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5054 if (scall == MAT_INITIAL_MATRIX) { 5055 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5056 ci[0] = 0; 5057 for (i=0; i<am; i++) { 5058 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5059 } 5060 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5061 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5062 k = 0; 5063 for (i=0; i<am; i++) { 5064 ncols_o = bi[i+1] - bi[i]; 5065 ncols_d = ai[i+1] - ai[i]; 5066 /* off-diagonal portion of A */ 5067 for (jo=0; jo<ncols_o; jo++) { 5068 col = cmap[*bj]; 5069 if (col >= cstart) break; 5070 cj[k] = col; bj++; 5071 ca[k++] = *ba++; 5072 } 5073 /* diagonal portion of A */ 5074 for (j=0; j<ncols_d; j++) { 5075 cj[k] = cstart + *aj++; 5076 ca[k++] = *aa++; 5077 } 5078 /* off-diagonal portion of A */ 5079 for (j=jo; j<ncols_o; j++) { 5080 cj[k] = cmap[*bj++]; 5081 ca[k++] = *ba++; 5082 } 5083 } 5084 /* put together the new matrix */ 5085 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5086 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5087 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5088 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5089 mat->free_a = PETSC_TRUE; 5090 mat->free_ij = PETSC_TRUE; 5091 mat->nonew = 0; 5092 } else if (scall == MAT_REUSE_MATRIX) { 5093 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5094 ci = mat->i; cj = mat->j; cam = mat->a; 5095 for (i=0; i<am; i++) { 5096 /* off-diagonal portion of A */ 5097 ncols_o = bi[i+1] - bi[i]; 5098 for (jo=0; jo<ncols_o; jo++) { 5099 col = cmap[*bj]; 5100 if (col >= cstart) break; 5101 *cam++ = *ba++; bj++; 5102 } 5103 /* diagonal portion of A */ 5104 ncols_d = ai[i+1] - ai[i]; 5105 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5106 /* off-diagonal portion of A */ 5107 for (j=jo; j<ncols_o; j++) { 5108 *cam++ = *ba++; bj++; 5109 } 5110 } 5111 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5112 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5113 PetscFunctionReturn(0); 5114 } 5115 5116 #undef __FUNCT__ 5117 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5118 /*@C 5119 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5120 5121 Not Collective 5122 5123 Input Parameters: 5124 + A - the matrix 5125 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5126 - row, col - index sets of rows and columns to extract (or NULL) 5127 5128 Output Parameter: 5129 . A_loc - the local sequential matrix generated 5130 5131 Level: developer 5132 5133 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5134 5135 @*/ 5136 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5137 { 5138 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5139 PetscErrorCode ierr; 5140 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5141 IS isrowa,iscola; 5142 Mat *aloc; 5143 PetscBool match; 5144 5145 PetscFunctionBegin; 5146 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5147 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5148 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5149 if (!row) { 5150 start = A->rmap->rstart; end = A->rmap->rend; 5151 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5152 } else { 5153 isrowa = *row; 5154 } 5155 if (!col) { 5156 start = A->cmap->rstart; 5157 cmap = a->garray; 5158 nzA = a->A->cmap->n; 5159 nzB = a->B->cmap->n; 5160 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5161 ncols = 0; 5162 for (i=0; i<nzB; i++) { 5163 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5164 else break; 5165 } 5166 imark = i; 5167 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5168 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5169 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5170 } else { 5171 iscola = *col; 5172 } 5173 if (scall != MAT_INITIAL_MATRIX) { 5174 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5175 aloc[0] = *A_loc; 5176 } 5177 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5178 *A_loc = aloc[0]; 5179 ierr = PetscFree(aloc);CHKERRQ(ierr); 5180 if (!row) { 5181 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5182 } 5183 if (!col) { 5184 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5185 } 5186 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5187 PetscFunctionReturn(0); 5188 } 5189 5190 #undef __FUNCT__ 5191 #define __FUNCT__ "MatGetBrowsOfAcols" 5192 /*@C 5193 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5194 5195 Collective on Mat 5196 5197 Input Parameters: 5198 + A,B - the matrices in mpiaij format 5199 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5200 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5201 5202 Output Parameter: 5203 + rowb, colb - index sets of rows and columns of B to extract 5204 - B_seq - the sequential matrix generated 5205 5206 Level: developer 5207 5208 @*/ 5209 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5210 { 5211 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5212 PetscErrorCode ierr; 5213 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5214 IS isrowb,iscolb; 5215 Mat *bseq=NULL; 5216 5217 PetscFunctionBegin; 5218 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5219 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5220 } 5221 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5222 5223 if (scall == MAT_INITIAL_MATRIX) { 5224 start = A->cmap->rstart; 5225 cmap = a->garray; 5226 nzA = a->A->cmap->n; 5227 nzB = a->B->cmap->n; 5228 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5229 ncols = 0; 5230 for (i=0; i<nzB; i++) { /* row < local row index */ 5231 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5232 else break; 5233 } 5234 imark = i; 5235 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5236 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5237 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5238 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5239 } else { 5240 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5241 isrowb = *rowb; iscolb = *colb; 5242 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5243 bseq[0] = *B_seq; 5244 } 5245 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5246 *B_seq = bseq[0]; 5247 ierr = PetscFree(bseq);CHKERRQ(ierr); 5248 if (!rowb) { 5249 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5250 } else { 5251 *rowb = isrowb; 5252 } 5253 if (!colb) { 5254 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5255 } else { 5256 *colb = iscolb; 5257 } 5258 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5259 PetscFunctionReturn(0); 5260 } 5261 5262 #undef __FUNCT__ 5263 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5264 /* 5265 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5266 of the OFF-DIAGONAL portion of local A 5267 5268 Collective on Mat 5269 5270 Input Parameters: 5271 + A,B - the matrices in mpiaij format 5272 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5273 5274 Output Parameter: 5275 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5276 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5277 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5278 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5279 5280 Level: developer 5281 5282 */ 5283 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5284 { 5285 VecScatter_MPI_General *gen_to,*gen_from; 5286 PetscErrorCode ierr; 5287 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5288 Mat_SeqAIJ *b_oth; 5289 VecScatter ctx =a->Mvctx; 5290 MPI_Comm comm; 5291 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5292 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5293 PetscScalar *rvalues,*svalues; 5294 MatScalar *b_otha,*bufa,*bufA; 5295 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5296 MPI_Request *rwaits = NULL,*swaits = NULL; 5297 MPI_Status *sstatus,rstatus; 5298 PetscMPIInt jj; 5299 PetscInt *cols,sbs,rbs; 5300 PetscScalar *vals; 5301 5302 PetscFunctionBegin; 5303 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5304 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5305 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5306 } 5307 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5308 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5309 5310 gen_to = (VecScatter_MPI_General*)ctx->todata; 5311 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5312 rvalues = gen_from->values; /* holds the length of receiving row */ 5313 svalues = gen_to->values; /* holds the length of sending row */ 5314 nrecvs = gen_from->n; 5315 nsends = gen_to->n; 5316 5317 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5318 srow = gen_to->indices; /* local row index to be sent */ 5319 sstarts = gen_to->starts; 5320 sprocs = gen_to->procs; 5321 sstatus = gen_to->sstatus; 5322 sbs = gen_to->bs; 5323 rstarts = gen_from->starts; 5324 rprocs = gen_from->procs; 5325 rbs = gen_from->bs; 5326 5327 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5328 if (scall == MAT_INITIAL_MATRIX) { 5329 /* i-array */ 5330 /*---------*/ 5331 /* post receives */ 5332 for (i=0; i<nrecvs; i++) { 5333 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5334 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5335 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5336 } 5337 5338 /* pack the outgoing message */ 5339 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5340 5341 sstartsj[0] = 0; 5342 rstartsj[0] = 0; 5343 len = 0; /* total length of j or a array to be sent */ 5344 k = 0; 5345 for (i=0; i<nsends; i++) { 5346 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5347 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5348 for (j=0; j<nrows; j++) { 5349 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5350 for (l=0; l<sbs; l++) { 5351 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5352 5353 rowlen[j*sbs+l] = ncols; 5354 5355 len += ncols; 5356 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5357 } 5358 k++; 5359 } 5360 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5361 5362 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5363 } 5364 /* recvs and sends of i-array are completed */ 5365 i = nrecvs; 5366 while (i--) { 5367 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5368 } 5369 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5370 5371 /* allocate buffers for sending j and a arrays */ 5372 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5373 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5374 5375 /* create i-array of B_oth */ 5376 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5377 5378 b_othi[0] = 0; 5379 len = 0; /* total length of j or a array to be received */ 5380 k = 0; 5381 for (i=0; i<nrecvs; i++) { 5382 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5383 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5384 for (j=0; j<nrows; j++) { 5385 b_othi[k+1] = b_othi[k] + rowlen[j]; 5386 len += rowlen[j]; k++; 5387 } 5388 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5389 } 5390 5391 /* allocate space for j and a arrrays of B_oth */ 5392 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5393 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5394 5395 /* j-array */ 5396 /*---------*/ 5397 /* post receives of j-array */ 5398 for (i=0; i<nrecvs; i++) { 5399 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5400 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5401 } 5402 5403 /* pack the outgoing message j-array */ 5404 k = 0; 5405 for (i=0; i<nsends; i++) { 5406 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5407 bufJ = bufj+sstartsj[i]; 5408 for (j=0; j<nrows; j++) { 5409 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5410 for (ll=0; ll<sbs; ll++) { 5411 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5412 for (l=0; l<ncols; l++) { 5413 *bufJ++ = cols[l]; 5414 } 5415 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5416 } 5417 } 5418 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5419 } 5420 5421 /* recvs and sends of j-array are completed */ 5422 i = nrecvs; 5423 while (i--) { 5424 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5425 } 5426 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5427 } else if (scall == MAT_REUSE_MATRIX) { 5428 sstartsj = *startsj_s; 5429 rstartsj = *startsj_r; 5430 bufa = *bufa_ptr; 5431 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5432 b_otha = b_oth->a; 5433 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5434 5435 /* a-array */ 5436 /*---------*/ 5437 /* post receives of a-array */ 5438 for (i=0; i<nrecvs; i++) { 5439 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5440 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5441 } 5442 5443 /* pack the outgoing message a-array */ 5444 k = 0; 5445 for (i=0; i<nsends; i++) { 5446 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5447 bufA = bufa+sstartsj[i]; 5448 for (j=0; j<nrows; j++) { 5449 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5450 for (ll=0; ll<sbs; ll++) { 5451 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5452 for (l=0; l<ncols; l++) { 5453 *bufA++ = vals[l]; 5454 } 5455 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5456 } 5457 } 5458 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5459 } 5460 /* recvs and sends of a-array are completed */ 5461 i = nrecvs; 5462 while (i--) { 5463 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5464 } 5465 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5466 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5467 5468 if (scall == MAT_INITIAL_MATRIX) { 5469 /* put together the new matrix */ 5470 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5471 5472 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5473 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5474 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5475 b_oth->free_a = PETSC_TRUE; 5476 b_oth->free_ij = PETSC_TRUE; 5477 b_oth->nonew = 0; 5478 5479 ierr = PetscFree(bufj);CHKERRQ(ierr); 5480 if (!startsj_s || !bufa_ptr) { 5481 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5482 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5483 } else { 5484 *startsj_s = sstartsj; 5485 *startsj_r = rstartsj; 5486 *bufa_ptr = bufa; 5487 } 5488 } 5489 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5490 PetscFunctionReturn(0); 5491 } 5492 5493 #undef __FUNCT__ 5494 #define __FUNCT__ "MatGetCommunicationStructs" 5495 /*@C 5496 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5497 5498 Not Collective 5499 5500 Input Parameters: 5501 . A - The matrix in mpiaij format 5502 5503 Output Parameter: 5504 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5505 . colmap - A map from global column index to local index into lvec 5506 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5507 5508 Level: developer 5509 5510 @*/ 5511 #if defined(PETSC_USE_CTABLE) 5512 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5513 #else 5514 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5515 #endif 5516 { 5517 Mat_MPIAIJ *a; 5518 5519 PetscFunctionBegin; 5520 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5521 PetscValidPointer(lvec, 2); 5522 PetscValidPointer(colmap, 3); 5523 PetscValidPointer(multScatter, 4); 5524 a = (Mat_MPIAIJ*) A->data; 5525 if (lvec) *lvec = a->lvec; 5526 if (colmap) *colmap = a->colmap; 5527 if (multScatter) *multScatter = a->Mvctx; 5528 PetscFunctionReturn(0); 5529 } 5530 5531 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5532 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5533 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5534 5535 #undef __FUNCT__ 5536 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5537 /* 5538 Computes (B'*A')' since computing B*A directly is untenable 5539 5540 n p p 5541 ( ) ( ) ( ) 5542 m ( A ) * n ( B ) = m ( C ) 5543 ( ) ( ) ( ) 5544 5545 */ 5546 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5547 { 5548 PetscErrorCode ierr; 5549 Mat At,Bt,Ct; 5550 5551 PetscFunctionBegin; 5552 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5553 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5554 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5555 ierr = MatDestroy(&At);CHKERRQ(ierr); 5556 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5557 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5558 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5559 PetscFunctionReturn(0); 5560 } 5561 5562 #undef __FUNCT__ 5563 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5564 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5565 { 5566 PetscErrorCode ierr; 5567 PetscInt m=A->rmap->n,n=B->cmap->n; 5568 Mat Cmat; 5569 5570 PetscFunctionBegin; 5571 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5572 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5573 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5574 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5575 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5576 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5577 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5578 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5579 5580 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5581 5582 *C = Cmat; 5583 PetscFunctionReturn(0); 5584 } 5585 5586 /* ----------------------------------------------------------------*/ 5587 #undef __FUNCT__ 5588 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5589 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5590 { 5591 PetscErrorCode ierr; 5592 5593 PetscFunctionBegin; 5594 if (scall == MAT_INITIAL_MATRIX) { 5595 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5596 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5597 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5598 } 5599 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5600 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5601 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5602 PetscFunctionReturn(0); 5603 } 5604 5605 #if defined(PETSC_HAVE_MUMPS) 5606 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5607 #endif 5608 #if defined(PETSC_HAVE_PASTIX) 5609 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5610 #endif 5611 #if defined(PETSC_HAVE_SUPERLU_DIST) 5612 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5613 #endif 5614 #if defined(PETSC_HAVE_CLIQUE) 5615 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5616 #endif 5617 5618 /*MC 5619 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5620 5621 Options Database Keys: 5622 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5623 5624 Level: beginner 5625 5626 .seealso: MatCreateAIJ() 5627 M*/ 5628 5629 #undef __FUNCT__ 5630 #define __FUNCT__ "MatCreate_MPIAIJ" 5631 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5632 { 5633 Mat_MPIAIJ *b; 5634 PetscErrorCode ierr; 5635 PetscMPIInt size; 5636 5637 PetscFunctionBegin; 5638 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5639 5640 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5641 B->data = (void*)b; 5642 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5643 B->assembled = PETSC_FALSE; 5644 B->insertmode = NOT_SET_VALUES; 5645 b->size = size; 5646 5647 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5648 5649 /* build cache for off array entries formed */ 5650 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5651 5652 b->donotstash = PETSC_FALSE; 5653 b->colmap = 0; 5654 b->garray = 0; 5655 b->roworiented = PETSC_TRUE; 5656 5657 /* stuff used for matrix vector multiply */ 5658 b->lvec = NULL; 5659 b->Mvctx = NULL; 5660 5661 /* stuff for MatGetRow() */ 5662 b->rowindices = 0; 5663 b->rowvalues = 0; 5664 b->getrowactive = PETSC_FALSE; 5665 5666 /* flexible pointer used in CUSP/CUSPARSE classes */ 5667 b->spptr = NULL; 5668 5669 #if defined(PETSC_HAVE_MUMPS) 5670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5671 #endif 5672 #if defined(PETSC_HAVE_PASTIX) 5673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5674 #endif 5675 #if defined(PETSC_HAVE_SUPERLU_DIST) 5676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5677 #endif 5678 #if defined(PETSC_HAVE_CLIQUE) 5679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5680 #endif 5681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5692 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5693 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5694 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5695 PetscFunctionReturn(0); 5696 } 5697 5698 #undef __FUNCT__ 5699 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5700 /*@ 5701 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5702 and "off-diagonal" part of the matrix in CSR format. 5703 5704 Collective on MPI_Comm 5705 5706 Input Parameters: 5707 + comm - MPI communicator 5708 . m - number of local rows (Cannot be PETSC_DECIDE) 5709 . n - This value should be the same as the local size used in creating the 5710 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5711 calculated if N is given) For square matrices n is almost always m. 5712 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5713 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5714 . i - row indices for "diagonal" portion of matrix 5715 . j - column indices 5716 . a - matrix values 5717 . oi - row indices for "off-diagonal" portion of matrix 5718 . oj - column indices 5719 - oa - matrix values 5720 5721 Output Parameter: 5722 . mat - the matrix 5723 5724 Level: advanced 5725 5726 Notes: 5727 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5728 must free the arrays once the matrix has been destroyed and not before. 5729 5730 The i and j indices are 0 based 5731 5732 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5733 5734 This sets local rows and cannot be used to set off-processor values. 5735 5736 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5737 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5738 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5739 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5740 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5741 communication if it is known that only local entries will be set. 5742 5743 .keywords: matrix, aij, compressed row, sparse, parallel 5744 5745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5746 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5747 @*/ 5748 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5749 { 5750 PetscErrorCode ierr; 5751 Mat_MPIAIJ *maij; 5752 5753 PetscFunctionBegin; 5754 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5755 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5756 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5757 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5758 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5759 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5760 maij = (Mat_MPIAIJ*) (*mat)->data; 5761 5762 (*mat)->preallocated = PETSC_TRUE; 5763 5764 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5765 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5766 5767 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5768 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5769 5770 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5771 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5772 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5773 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5774 5775 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5776 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5777 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5778 PetscFunctionReturn(0); 5779 } 5780 5781 /* 5782 Special version for direct calls from Fortran 5783 */ 5784 #include <petsc-private/fortranimpl.h> 5785 5786 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5787 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5788 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5789 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5790 #endif 5791 5792 /* Change these macros so can be used in void function */ 5793 #undef CHKERRQ 5794 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5795 #undef SETERRQ2 5796 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5797 #undef SETERRQ3 5798 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5799 #undef SETERRQ 5800 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5801 5802 #undef __FUNCT__ 5803 #define __FUNCT__ "matsetvaluesmpiaij_" 5804 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5805 { 5806 Mat mat = *mmat; 5807 PetscInt m = *mm, n = *mn; 5808 InsertMode addv = *maddv; 5809 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5810 PetscScalar value; 5811 PetscErrorCode ierr; 5812 5813 MatCheckPreallocated(mat,1); 5814 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5815 5816 #if defined(PETSC_USE_DEBUG) 5817 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5818 #endif 5819 { 5820 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5821 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5822 PetscBool roworiented = aij->roworiented; 5823 5824 /* Some Variables required in the macro */ 5825 Mat A = aij->A; 5826 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5827 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5828 MatScalar *aa = a->a; 5829 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5830 Mat B = aij->B; 5831 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5832 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5833 MatScalar *ba = b->a; 5834 5835 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5836 PetscInt nonew = a->nonew; 5837 MatScalar *ap1,*ap2; 5838 5839 PetscFunctionBegin; 5840 for (i=0; i<m; i++) { 5841 if (im[i] < 0) continue; 5842 #if defined(PETSC_USE_DEBUG) 5843 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5844 #endif 5845 if (im[i] >= rstart && im[i] < rend) { 5846 row = im[i] - rstart; 5847 lastcol1 = -1; 5848 rp1 = aj + ai[row]; 5849 ap1 = aa + ai[row]; 5850 rmax1 = aimax[row]; 5851 nrow1 = ailen[row]; 5852 low1 = 0; 5853 high1 = nrow1; 5854 lastcol2 = -1; 5855 rp2 = bj + bi[row]; 5856 ap2 = ba + bi[row]; 5857 rmax2 = bimax[row]; 5858 nrow2 = bilen[row]; 5859 low2 = 0; 5860 high2 = nrow2; 5861 5862 for (j=0; j<n; j++) { 5863 if (roworiented) value = v[i*n+j]; 5864 else value = v[i+j*m]; 5865 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5866 if (in[j] >= cstart && in[j] < cend) { 5867 col = in[j] - cstart; 5868 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5869 } else if (in[j] < 0) continue; 5870 #if defined(PETSC_USE_DEBUG) 5871 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5872 #endif 5873 else { 5874 if (mat->was_assembled) { 5875 if (!aij->colmap) { 5876 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5877 } 5878 #if defined(PETSC_USE_CTABLE) 5879 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5880 col--; 5881 #else 5882 col = aij->colmap[in[j]] - 1; 5883 #endif 5884 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5885 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5886 col = in[j]; 5887 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5888 B = aij->B; 5889 b = (Mat_SeqAIJ*)B->data; 5890 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5891 rp2 = bj + bi[row]; 5892 ap2 = ba + bi[row]; 5893 rmax2 = bimax[row]; 5894 nrow2 = bilen[row]; 5895 low2 = 0; 5896 high2 = nrow2; 5897 bm = aij->B->rmap->n; 5898 ba = b->a; 5899 } 5900 } else col = in[j]; 5901 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5902 } 5903 } 5904 } else if (!aij->donotstash) { 5905 if (roworiented) { 5906 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5907 } else { 5908 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5909 } 5910 } 5911 } 5912 } 5913 PetscFunctionReturnVoid(); 5914 } 5915 5916