1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++){ 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 416 lastcol2 = col;\ 417 while (high2-low2 > 5) { \ 418 t = (low2+high2)/2; \ 419 if (rp2[t] > col) high2 = t; \ 420 else low2 = t; \ 421 } \ 422 for (_i=low2; _i<high2; _i++) { \ 423 if (rp2[_i] > col) break; \ 424 if (rp2[_i] == col) { \ 425 if (addv == ADD_VALUES) ap2[_i] += value; \ 426 else ap2[_i] = value; \ 427 goto b_noinsert; \ 428 } \ 429 } \ 430 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 433 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 434 N = nrow2++ - 1; b->nz++; high2++; \ 435 /* shift up all the later entries in this row */ \ 436 for (ii=N; ii>=_i; ii--) { \ 437 rp2[ii+1] = rp2[ii]; \ 438 ap2[ii+1] = ap2[ii]; \ 439 } \ 440 rp2[_i] = col; \ 441 ap2[_i] = value; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend){ 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined (PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend){ 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined (PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col --; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else { 620 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscErrorCode ierr; 634 PetscInt nstash,reallocs; 635 InsertMode addv; 636 637 PetscFunctionBegin; 638 if (aij->donotstash || mat->nooffprocentries) { 639 PetscFunctionReturn(0); 640 } 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n;) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 677 if (j < n) ncols = j-i; 678 else ncols = n-i; 679 /* Now assemble all these values with a single function call */ 680 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 706 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 708 709 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 724 { 725 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 726 PetscErrorCode ierr; 727 728 PetscFunctionBegin; 729 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 730 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatZeroRows_MPIAIJ" 736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 737 { 738 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 739 PetscErrorCode ierr; 740 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 741 PetscInt i,*owners = A->rmap->range; 742 PetscInt *nprocs,j,idx,nsends,row; 743 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 744 PetscInt *rvalues,count,base,slen,*source; 745 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 746 MPI_Comm comm = ((PetscObject)A)->comm; 747 MPI_Request *send_waits,*recv_waits; 748 MPI_Status recv_status,*send_status; 749 const PetscScalar *xx; 750 PetscScalar *bb; 751 #if defined(PETSC_DEBUG) 752 PetscBool found = PETSC_FALSE; 753 #endif 754 755 PetscFunctionBegin; 756 /* first count number of contributors to each processor */ 757 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 758 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 759 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 760 j = 0; 761 for (i=0; i<N; i++) { 762 if (lastidx > (idx = rows[i])) j = 0; 763 lastidx = idx; 764 for (; j<size; j++) { 765 if (idx >= owners[j] && idx < owners[j+1]) { 766 nprocs[2*j]++; 767 nprocs[2*j+1] = 1; 768 owner[i] = j; 769 #if defined(PETSC_DEBUG) 770 found = PETSC_TRUE; 771 #endif 772 break; 773 } 774 } 775 #if defined(PETSC_DEBUG) 776 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 777 found = PETSC_FALSE; 778 #endif 779 } 780 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 781 782 if (A->nooffproczerorows) { 783 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 784 nrecvs = nsends; 785 nmax = N; 786 } else { 787 /* inform other processors of number of messages and max length*/ 788 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 789 } 790 791 /* post receives: */ 792 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 793 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 794 for (i=0; i<nrecvs; i++) { 795 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 796 } 797 798 /* do sends: 799 1) starts[i] gives the starting index in svalues for stuff going to 800 the ith processor 801 */ 802 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 804 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 805 starts[0] = 0; 806 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 807 for (i=0; i<N; i++) { 808 svalues[starts[owner[i]]++] = rows[i]; 809 } 810 811 starts[0] = 0; 812 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 813 count = 0; 814 for (i=0; i<size; i++) { 815 if (nprocs[2*i+1]) { 816 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 817 } 818 } 819 ierr = PetscFree(starts);CHKERRQ(ierr); 820 821 base = owners[rank]; 822 823 /* wait on receives */ 824 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 825 count = nrecvs; slen = 0; 826 while (count) { 827 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 828 /* unpack receives into our local space */ 829 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 830 source[imdex] = recv_status.MPI_SOURCE; 831 lens[imdex] = n; 832 slen += n; 833 count--; 834 } 835 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 836 837 /* move the data into the send scatter */ 838 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 839 count = 0; 840 for (i=0; i<nrecvs; i++) { 841 values = rvalues + i*nmax; 842 for (j=0; j<lens[i]; j++) { 843 lrows[count++] = values[j] - base; 844 } 845 } 846 ierr = PetscFree(rvalues);CHKERRQ(ierr); 847 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 848 ierr = PetscFree(owner);CHKERRQ(ierr); 849 ierr = PetscFree(nprocs);CHKERRQ(ierr); 850 851 /* fix right hand side if needed */ 852 if (x && b) { 853 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 855 for (i=0; i<slen; i++) { 856 bb[lrows[i]] = diag*xx[lrows[i]]; 857 } 858 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 859 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 860 } 861 /* 862 Zero the required rows. If the "diagonal block" of the matrix 863 is square and the user wishes to set the diagonal we use separate 864 code so that MatSetValues() is not called for each diagonal allocating 865 new memory, thus calling lots of mallocs and slowing things down. 866 867 */ 868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 869 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 871 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 872 } else if (diag != 0.0) { 873 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 875 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 877 } 878 for (i = 0; i < slen; i++) { 879 row = lrows[i] + rstart; 880 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 881 } 882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 } else { 885 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 886 } 887 ierr = PetscFree(lrows);CHKERRQ(ierr); 888 889 /* wait on sends */ 890 if (nsends) { 891 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 892 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 893 ierr = PetscFree(send_status);CHKERRQ(ierr); 894 } 895 ierr = PetscFree(send_waits);CHKERRQ(ierr); 896 ierr = PetscFree(svalues);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 903 { 904 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 907 PetscInt i,*owners = A->rmap->range; 908 PetscInt *nprocs,j,idx,nsends; 909 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 910 PetscInt *rvalues,count,base,slen,*source; 911 PetscInt *lens,*lrows,*values,m; 912 MPI_Comm comm = ((PetscObject)A)->comm; 913 MPI_Request *send_waits,*recv_waits; 914 MPI_Status recv_status,*send_status; 915 const PetscScalar *xx; 916 PetscScalar *bb,*mask; 917 Vec xmask,lmask; 918 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 919 const PetscInt *aj, *ii,*ridx; 920 PetscScalar *aa; 921 #if defined(PETSC_DEBUG) 922 PetscBool found = PETSC_FALSE; 923 #endif 924 925 PetscFunctionBegin; 926 /* first count number of contributors to each processor */ 927 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 928 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 929 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 930 j = 0; 931 for (i=0; i<N; i++) { 932 if (lastidx > (idx = rows[i])) j = 0; 933 lastidx = idx; 934 for (; j<size; j++) { 935 if (idx >= owners[j] && idx < owners[j+1]) { 936 nprocs[2*j]++; 937 nprocs[2*j+1] = 1; 938 owner[i] = j; 939 #if defined(PETSC_DEBUG) 940 found = PETSC_TRUE; 941 #endif 942 break; 943 } 944 } 945 #if defined(PETSC_DEBUG) 946 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 947 found = PETSC_FALSE; 948 #endif 949 } 950 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 951 952 /* inform other processors of number of messages and max length*/ 953 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 954 955 /* post receives: */ 956 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 957 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 958 for (i=0; i<nrecvs; i++) { 959 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 960 } 961 962 /* do sends: 963 1) starts[i] gives the starting index in svalues for stuff going to 964 the ith processor 965 */ 966 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 967 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 968 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 969 starts[0] = 0; 970 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 971 for (i=0; i<N; i++) { 972 svalues[starts[owner[i]]++] = rows[i]; 973 } 974 975 starts[0] = 0; 976 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 977 count = 0; 978 for (i=0; i<size; i++) { 979 if (nprocs[2*i+1]) { 980 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 981 } 982 } 983 ierr = PetscFree(starts);CHKERRQ(ierr); 984 985 base = owners[rank]; 986 987 /* wait on receives */ 988 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 989 count = nrecvs; slen = 0; 990 while (count) { 991 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 992 /* unpack receives into our local space */ 993 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 994 source[imdex] = recv_status.MPI_SOURCE; 995 lens[imdex] = n; 996 slen += n; 997 count--; 998 } 999 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1000 1001 /* move the data into the send scatter */ 1002 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1003 count = 0; 1004 for (i=0; i<nrecvs; i++) { 1005 values = rvalues + i*nmax; 1006 for (j=0; j<lens[i]; j++) { 1007 lrows[count++] = values[j] - base; 1008 } 1009 } 1010 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1011 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1012 ierr = PetscFree(owner);CHKERRQ(ierr); 1013 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1014 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1015 1016 /* zero diagonal part of matrix */ 1017 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1018 1019 /* handle off diagonal part of matrix */ 1020 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1021 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1022 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1023 for (i=0; i<slen; i++) { 1024 bb[lrows[i]] = 1; 1025 } 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use){ 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++){ 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 EXTERN_C_BEGIN 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 1191 /* Easy test: symmetric diagonal block */ 1192 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1193 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1194 if (!*f) PetscFunctionReturn(0); 1195 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1197 if (size == 1) PetscFunctionReturn(0); 1198 1199 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1200 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1201 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1202 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1203 for (i=0; i<first; i++) notme[i] = i; 1204 for (i=last; i<M; i++) notme[i-last+first] = i; 1205 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1206 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1207 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1208 Aoff = Aoffs[0]; 1209 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1210 Boff = Boffs[0]; 1211 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1213 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1215 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1216 ierr = PetscFree(notme);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 EXTERN_C_END 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1224 { 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* do nondiagonal part */ 1230 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1231 /* send it on its way */ 1232 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 /* do local part */ 1234 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1235 /* receive remote parts */ 1236 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 /* 1241 This only works correctly for square matrices where the subblock A->A is the 1242 diagonal block 1243 */ 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1247 { 1248 PetscErrorCode ierr; 1249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1250 1251 PetscFunctionBegin; 1252 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1253 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1254 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatScale_MPIAIJ" 1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1261 { 1262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1267 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDestroy_MPIAIJ" 1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 #if defined(PETSC_USE_LOG) 1280 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1281 #endif 1282 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1283 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1284 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1285 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1286 #if defined (PETSC_USE_CTABLE) 1287 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1288 #else 1289 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1290 #endif 1291 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1292 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1293 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1294 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1295 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1316 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1319 int fd; 1320 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1321 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1322 PetscScalar *column_values; 1323 PetscInt message_count,flowcontrolcount; 1324 FILE *file; 1325 1326 PetscFunctionBegin; 1327 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1328 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1329 nz = A->nz + B->nz; 1330 if (!rank) { 1331 header[0] = MAT_FILE_CLASSID; 1332 header[1] = mat->rmap->N; 1333 header[2] = mat->cmap->N; 1334 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1335 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1336 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1337 /* get largest number of rows any processor has */ 1338 rlen = mat->rmap->n; 1339 range = mat->rmap->range; 1340 for (i=1; i<size; i++) { 1341 rlen = PetscMax(rlen,range[i+1] - range[i]); 1342 } 1343 } else { 1344 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 rlen = mat->rmap->n; 1346 } 1347 1348 /* load up the local row counts */ 1349 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1350 for (i=0; i<mat->rmap->n; i++) { 1351 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1352 } 1353 1354 /* store the row lengths to the file */ 1355 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 for (i=1; i<size; i++) { 1359 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1360 rlen = range[i+1] - range[i]; 1361 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1362 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1363 } 1364 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1367 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1368 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1369 } 1370 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1371 1372 /* load up the local column indices */ 1373 nzmax = nz; /* )th processor needs space a largest processor needs */ 1374 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1375 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1376 cnt = 0; 1377 for (i=0; i<mat->rmap->n; i++) { 1378 for (j=B->i[i]; j<B->i[i+1]; j++) { 1379 if ( (col = garray[B->j[j]]) > cstart) break; 1380 column_indices[cnt++] = col; 1381 } 1382 for (k=A->i[i]; k<A->i[i+1]; k++) { 1383 column_indices[cnt++] = A->j[k] + cstart; 1384 } 1385 for (; j<B->i[i+1]; j++) { 1386 column_indices[cnt++] = garray[B->j[j]]; 1387 } 1388 } 1389 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1390 1391 /* store the column indices to the file */ 1392 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1393 if (!rank) { 1394 MPI_Status status; 1395 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1396 for (i=1; i<size; i++) { 1397 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1398 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1399 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1400 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1401 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1402 } 1403 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1406 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1408 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1409 } 1410 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1411 1412 /* load up the local column values */ 1413 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1414 cnt = 0; 1415 for (i=0; i<mat->rmap->n; i++) { 1416 for (j=B->i[i]; j<B->i[i+1]; j++) { 1417 if ( garray[B->j[j]] > cstart) break; 1418 column_values[cnt++] = B->a[j]; 1419 } 1420 for (k=A->i[i]; k<A->i[i+1]; k++) { 1421 column_values[cnt++] = A->a[k]; 1422 } 1423 for (; j<B->i[i+1]; j++) { 1424 column_values[cnt++] = B->a[j]; 1425 } 1426 } 1427 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1428 1429 /* store the column values to the file */ 1430 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1431 if (!rank) { 1432 MPI_Status status; 1433 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1434 for (i=1; i<size; i++) { 1435 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1436 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1437 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1438 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1439 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1440 } 1441 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1442 } else { 1443 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1444 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1445 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1446 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1447 } 1448 ierr = PetscFree(column_values);CHKERRQ(ierr); 1449 1450 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1451 if (file) { 1452 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1453 } 1454 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1461 { 1462 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1463 PetscErrorCode ierr; 1464 PetscMPIInt rank = aij->rank,size = aij->size; 1465 PetscBool isdraw,iascii,isbinary; 1466 PetscViewer sviewer; 1467 PetscViewerFormat format; 1468 1469 PetscFunctionBegin; 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1472 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1473 if (iascii) { 1474 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1475 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1476 MatInfo info; 1477 PetscBool inodes; 1478 1479 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1480 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1481 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1482 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1483 if (!inodes) { 1484 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1485 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1486 } else { 1487 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1488 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1489 } 1490 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1491 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1492 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1493 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1494 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1495 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1497 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1500 PetscInt inodecount,inodelimit,*inodes; 1501 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1502 if (inodes) { 1503 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1504 } else { 1505 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1509 PetscFunctionReturn(0); 1510 } 1511 } else if (isbinary) { 1512 if (size == 1) { 1513 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1514 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1515 } else { 1516 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1517 } 1518 PetscFunctionReturn(0); 1519 } else if (isdraw) { 1520 PetscDraw draw; 1521 PetscBool isnull; 1522 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1523 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1524 } 1525 1526 if (size == 1) { 1527 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1528 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1529 } else { 1530 /* assemble the entire matrix onto first processor. */ 1531 Mat A; 1532 Mat_SeqAIJ *Aloc; 1533 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1534 MatScalar *a; 1535 1536 if (mat->rmap->N > 1024) { 1537 PetscBool flg = PETSC_FALSE; 1538 1539 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1540 if (!flg) { 1541 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1542 } 1543 } 1544 1545 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1546 if (!rank) { 1547 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1550 } 1551 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1552 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1553 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1554 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1555 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1556 1557 /* copy over the A part */ 1558 Aloc = (Mat_SeqAIJ*)aij->A->data; 1559 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1560 row = mat->rmap->rstart; 1561 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1562 for (i=0; i<m; i++) { 1563 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1564 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1565 } 1566 aj = Aloc->j; 1567 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1568 1569 /* copy over the B part */ 1570 Aloc = (Mat_SeqAIJ*)aij->B->data; 1571 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1572 row = mat->rmap->rstart; 1573 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1574 ct = cols; 1575 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1576 for (i=0; i<m; i++) { 1577 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1578 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1579 } 1580 ierr = PetscFree(ct);CHKERRQ(ierr); 1581 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 /* 1584 Everyone has to call to draw the matrix since the graphics waits are 1585 synchronized across all processors that share the PetscDraw object 1586 */ 1587 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1588 if (!rank) { 1589 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1590 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1591 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1592 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1593 } 1594 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1595 ierr = MatDestroy(&A);CHKERRQ(ierr); 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatView_MPIAIJ" 1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1603 { 1604 PetscErrorCode ierr; 1605 PetscBool iascii,isdraw,issocket,isbinary; 1606 1607 PetscFunctionBegin; 1608 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1610 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1611 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1612 if (iascii || isdraw || isbinary || issocket) { 1613 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1614 } else { 1615 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatSOR_MPIAIJ" 1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1623 { 1624 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1625 PetscErrorCode ierr; 1626 Vec bb1 = 0; 1627 PetscBool hasop; 1628 1629 PetscFunctionBegin; 1630 if (flag == SOR_APPLY_UPPER) { 1631 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1636 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1637 } 1638 1639 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 1645 while (its--) { 1646 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 1649 /* update rhs: bb1 = bb - B*x */ 1650 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1651 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1652 1653 /* local sweep */ 1654 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1655 } 1656 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1657 if (flag & SOR_ZERO_INITIAL_GUESS) { 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1659 its--; 1660 } 1661 while (its--) { 1662 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 1665 /* update rhs: bb1 = bb - B*x */ 1666 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1667 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1668 1669 /* local sweep */ 1670 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1671 } 1672 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1673 if (flag & SOR_ZERO_INITIAL_GUESS) { 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1675 its--; 1676 } 1677 while (its--) { 1678 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1680 1681 /* update rhs: bb1 = bb - B*x */ 1682 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1683 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1684 1685 /* local sweep */ 1686 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1687 } 1688 } else if (flag & SOR_EISENSTAT) { 1689 Vec xx1; 1690 1691 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1693 1694 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1695 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 if (!mat->diag) { 1697 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1698 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1699 } 1700 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1701 if (hasop) { 1702 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1703 } else { 1704 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1705 } 1706 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1707 1708 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1709 1710 /* local sweep */ 1711 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1712 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1713 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1714 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1715 1716 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatPermute_MPIAIJ" 1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1723 { 1724 MPI_Comm comm; 1725 PetscInt first,local_rowsize,local_colsize; 1726 const PetscInt *rows; 1727 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1732 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1733 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1734 /* collect the global row permutation and invert it */ 1735 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1736 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1737 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1738 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1739 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1740 /* get the local target indices */ 1741 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1742 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1743 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1744 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1745 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1746 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1747 /* the column permutation is so much easier; 1748 make a local version of 'colp' and invert it */ 1749 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1750 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1751 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1752 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1753 /* now we just get the submatrix */ 1754 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1755 /* clean up */ 1756 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1757 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1763 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1764 { 1765 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1766 Mat A = mat->A,B = mat->B; 1767 PetscErrorCode ierr; 1768 PetscReal isend[5],irecv[5]; 1769 1770 PetscFunctionBegin; 1771 info->block_size = 1.0; 1772 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1773 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1774 isend[3] = info->memory; isend[4] = info->mallocs; 1775 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1776 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1777 isend[3] += info->memory; isend[4] += info->mallocs; 1778 if (flag == MAT_LOCAL) { 1779 info->nz_used = isend[0]; 1780 info->nz_allocated = isend[1]; 1781 info->nz_unneeded = isend[2]; 1782 info->memory = isend[3]; 1783 info->mallocs = isend[4]; 1784 } else if (flag == MAT_GLOBAL_MAX) { 1785 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1786 info->nz_used = irecv[0]; 1787 info->nz_allocated = irecv[1]; 1788 info->nz_unneeded = irecv[2]; 1789 info->memory = irecv[3]; 1790 info->mallocs = irecv[4]; 1791 } else if (flag == MAT_GLOBAL_SUM) { 1792 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1793 info->nz_used = irecv[0]; 1794 info->nz_allocated = irecv[1]; 1795 info->nz_unneeded = irecv[2]; 1796 info->memory = irecv[3]; 1797 info->mallocs = irecv[4]; 1798 } 1799 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1800 info->fill_ratio_needed = 0; 1801 info->factor_mallocs = 0; 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatSetOption_MPIAIJ" 1807 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1808 { 1809 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1810 PetscErrorCode ierr; 1811 1812 PetscFunctionBegin; 1813 switch (op) { 1814 case MAT_NEW_NONZERO_LOCATIONS: 1815 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1816 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1817 case MAT_KEEP_NONZERO_PATTERN: 1818 case MAT_NEW_NONZERO_LOCATION_ERR: 1819 case MAT_USE_INODES: 1820 case MAT_IGNORE_ZERO_ENTRIES: 1821 MatCheckPreallocated(A,1); 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1824 break; 1825 case MAT_ROW_ORIENTED: 1826 a->roworiented = flg; 1827 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1828 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1829 break; 1830 case MAT_NEW_DIAGONALS: 1831 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1832 break; 1833 case MAT_IGNORE_OFF_PROC_ENTRIES: 1834 a->donotstash = flg; 1835 break; 1836 case MAT_SPD: 1837 A->spd_set = PETSC_TRUE; 1838 A->spd = flg; 1839 if (flg) { 1840 A->symmetric = PETSC_TRUE; 1841 A->structurally_symmetric = PETSC_TRUE; 1842 A->symmetric_set = PETSC_TRUE; 1843 A->structurally_symmetric_set = PETSC_TRUE; 1844 } 1845 break; 1846 case MAT_SYMMETRIC: 1847 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1848 break; 1849 case MAT_STRUCTURALLY_SYMMETRIC: 1850 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1851 break; 1852 case MAT_HERMITIAN: 1853 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1854 break; 1855 case MAT_SYMMETRY_ETERNAL: 1856 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1857 break; 1858 default: 1859 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1860 } 1861 PetscFunctionReturn(0); 1862 } 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "MatGetRow_MPIAIJ" 1866 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1867 { 1868 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1869 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1870 PetscErrorCode ierr; 1871 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1872 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1873 PetscInt *cmap,*idx_p; 1874 1875 PetscFunctionBegin; 1876 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1877 mat->getrowactive = PETSC_TRUE; 1878 1879 if (!mat->rowvalues && (idx || v)) { 1880 /* 1881 allocate enough space to hold information from the longest row. 1882 */ 1883 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1884 PetscInt max = 1,tmp; 1885 for (i=0; i<matin->rmap->n; i++) { 1886 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1887 if (max < tmp) { max = tmp; } 1888 } 1889 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1890 } 1891 1892 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1893 lrow = row - rstart; 1894 1895 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1896 if (!v) {pvA = 0; pvB = 0;} 1897 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1898 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1899 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1900 nztot = nzA + nzB; 1901 1902 cmap = mat->garray; 1903 if (v || idx) { 1904 if (nztot) { 1905 /* Sort by increasing column numbers, assuming A and B already sorted */ 1906 PetscInt imark = -1; 1907 if (v) { 1908 *v = v_p = mat->rowvalues; 1909 for (i=0; i<nzB; i++) { 1910 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1911 else break; 1912 } 1913 imark = i; 1914 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1915 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1916 } 1917 if (idx) { 1918 *idx = idx_p = mat->rowindices; 1919 if (imark > -1) { 1920 for (i=0; i<imark; i++) { 1921 idx_p[i] = cmap[cworkB[i]]; 1922 } 1923 } else { 1924 for (i=0; i<nzB; i++) { 1925 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1926 else break; 1927 } 1928 imark = i; 1929 } 1930 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1931 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1932 } 1933 } else { 1934 if (idx) *idx = 0; 1935 if (v) *v = 0; 1936 } 1937 } 1938 *nz = nztot; 1939 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1940 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1941 PetscFunctionReturn(0); 1942 } 1943 1944 #undef __FUNCT__ 1945 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1946 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1947 { 1948 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1949 1950 PetscFunctionBegin; 1951 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1952 aij->getrowactive = PETSC_FALSE; 1953 PetscFunctionReturn(0); 1954 } 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "MatNorm_MPIAIJ" 1958 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1959 { 1960 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1961 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1962 PetscErrorCode ierr; 1963 PetscInt i,j,cstart = mat->cmap->rstart; 1964 PetscReal sum = 0.0; 1965 MatScalar *v; 1966 1967 PetscFunctionBegin; 1968 if (aij->size == 1) { 1969 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1970 } else { 1971 if (type == NORM_FROBENIUS) { 1972 v = amat->a; 1973 for (i=0; i<amat->nz; i++) { 1974 #if defined(PETSC_USE_COMPLEX) 1975 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1976 #else 1977 sum += (*v)*(*v); v++; 1978 #endif 1979 } 1980 v = bmat->a; 1981 for (i=0; i<bmat->nz; i++) { 1982 #if defined(PETSC_USE_COMPLEX) 1983 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1984 #else 1985 sum += (*v)*(*v); v++; 1986 #endif 1987 } 1988 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1989 *norm = PetscSqrtReal(*norm); 1990 } else if (type == NORM_1) { /* max column norm */ 1991 PetscReal *tmp,*tmp2; 1992 PetscInt *jj,*garray = aij->garray; 1993 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1994 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1995 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1996 *norm = 0.0; 1997 v = amat->a; jj = amat->j; 1998 for (j=0; j<amat->nz; j++) { 1999 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 2000 } 2001 v = bmat->a; jj = bmat->j; 2002 for (j=0; j<bmat->nz; j++) { 2003 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2004 } 2005 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2006 for (j=0; j<mat->cmap->N; j++) { 2007 if (tmp2[j] > *norm) *norm = tmp2[j]; 2008 } 2009 ierr = PetscFree(tmp);CHKERRQ(ierr); 2010 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2011 } else if (type == NORM_INFINITY) { /* max row norm */ 2012 PetscReal ntemp = 0.0; 2013 for (j=0; j<aij->A->rmap->n; j++) { 2014 v = amat->a + amat->i[j]; 2015 sum = 0.0; 2016 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2017 sum += PetscAbsScalar(*v); v++; 2018 } 2019 v = bmat->a + bmat->i[j]; 2020 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2021 sum += PetscAbsScalar(*v); v++; 2022 } 2023 if (sum > ntemp) ntemp = sum; 2024 } 2025 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2026 } else { 2027 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2028 } 2029 } 2030 PetscFunctionReturn(0); 2031 } 2032 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "MatTranspose_MPIAIJ" 2035 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2036 { 2037 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2038 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2039 PetscErrorCode ierr; 2040 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2041 PetscInt cstart=A->cmap->rstart,ncol; 2042 Mat B; 2043 MatScalar *array; 2044 2045 PetscFunctionBegin; 2046 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2047 2048 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2049 ai = Aloc->i; aj = Aloc->j; 2050 bi = Bloc->i; bj = Bloc->j; 2051 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2052 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2053 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2054 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2055 for (i=0; i<ai[ma]; i++){ 2056 d_nnz[aj[i]] ++; 2057 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2058 } 2059 2060 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2061 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2062 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2063 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2064 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2065 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2066 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2067 } else { 2068 B = *matout; 2069 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2070 for (i=0; i<ai[ma]; i++){ 2071 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2072 } 2073 } 2074 2075 /* copy over the A part */ 2076 array = Aloc->a; 2077 row = A->rmap->rstart; 2078 for (i=0; i<ma; i++) { 2079 ncol = ai[i+1]-ai[i]; 2080 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2081 row++; array += ncol; aj += ncol; 2082 } 2083 aj = Aloc->j; 2084 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2085 2086 /* copy over the B part */ 2087 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2088 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2089 array = Bloc->a; 2090 row = A->rmap->rstart; 2091 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2092 cols_tmp = cols; 2093 for (i=0; i<mb; i++) { 2094 ncol = bi[i+1]-bi[i]; 2095 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2096 row++; array += ncol; cols_tmp += ncol; 2097 } 2098 ierr = PetscFree(cols);CHKERRQ(ierr); 2099 2100 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2101 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2102 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2103 *matout = B; 2104 } else { 2105 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2106 } 2107 PetscFunctionReturn(0); 2108 } 2109 2110 #undef __FUNCT__ 2111 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2112 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2113 { 2114 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2115 Mat a = aij->A,b = aij->B; 2116 PetscErrorCode ierr; 2117 PetscInt s1,s2,s3; 2118 2119 PetscFunctionBegin; 2120 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2121 if (rr) { 2122 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2123 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2124 /* Overlap communication with computation. */ 2125 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2126 } 2127 if (ll) { 2128 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2129 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2130 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2131 } 2132 /* scale the diagonal block */ 2133 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2134 2135 if (rr) { 2136 /* Do a scatter end and then right scale the off-diagonal block */ 2137 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2138 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2139 } 2140 2141 PetscFunctionReturn(0); 2142 } 2143 2144 #undef __FUNCT__ 2145 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2146 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2147 { 2148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2149 PetscErrorCode ierr; 2150 2151 PetscFunctionBegin; 2152 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2153 PetscFunctionReturn(0); 2154 } 2155 2156 #undef __FUNCT__ 2157 #define __FUNCT__ "MatEqual_MPIAIJ" 2158 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2159 { 2160 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2161 Mat a,b,c,d; 2162 PetscBool flg; 2163 PetscErrorCode ierr; 2164 2165 PetscFunctionBegin; 2166 a = matA->A; b = matA->B; 2167 c = matB->A; d = matB->B; 2168 2169 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2170 if (flg) { 2171 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2172 } 2173 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "MatCopy_MPIAIJ" 2179 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2180 { 2181 PetscErrorCode ierr; 2182 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2183 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2184 2185 PetscFunctionBegin; 2186 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2187 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2188 /* because of the column compression in the off-processor part of the matrix a->B, 2189 the number of columns in a->B and b->B may be different, hence we cannot call 2190 the MatCopy() directly on the two parts. If need be, we can provide a more 2191 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2192 then copying the submatrices */ 2193 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2194 } else { 2195 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2196 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2197 } 2198 PetscFunctionReturn(0); 2199 } 2200 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "MatSetUp_MPIAIJ" 2203 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2204 { 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2214 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2215 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2216 { 2217 PetscInt i,m=Y->rmap->N; 2218 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2219 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2220 const PetscInt *xi = x->i,*yi = y->i; 2221 2222 PetscFunctionBegin; 2223 /* Set the number of nonzeros in the new matrix */ 2224 for(i=0; i<m; i++) { 2225 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2226 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2227 nnz[i] = 0; 2228 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2229 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2230 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2231 nnz[i]++; 2232 } 2233 for (; k<nzy; k++) nnz[i]++; 2234 } 2235 PetscFunctionReturn(0); 2236 } 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatAXPY_MPIAIJ" 2240 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2241 { 2242 PetscErrorCode ierr; 2243 PetscInt i; 2244 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2245 PetscBLASInt bnz,one=1; 2246 Mat_SeqAIJ *x,*y; 2247 2248 PetscFunctionBegin; 2249 if (str == SAME_NONZERO_PATTERN) { 2250 PetscScalar alpha = a; 2251 x = (Mat_SeqAIJ *)xx->A->data; 2252 y = (Mat_SeqAIJ *)yy->A->data; 2253 bnz = PetscBLASIntCast(x->nz); 2254 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2255 x = (Mat_SeqAIJ *)xx->B->data; 2256 y = (Mat_SeqAIJ *)yy->B->data; 2257 bnz = PetscBLASIntCast(x->nz); 2258 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2259 } else if (str == SUBSET_NONZERO_PATTERN) { 2260 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2261 2262 x = (Mat_SeqAIJ *)xx->B->data; 2263 y = (Mat_SeqAIJ *)yy->B->data; 2264 if (y->xtoy && y->XtoY != xx->B) { 2265 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2266 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2267 } 2268 if (!y->xtoy) { /* get xtoy */ 2269 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2270 y->XtoY = xx->B; 2271 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2272 } 2273 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2274 } else { 2275 Mat B; 2276 PetscInt *nnz_d,*nnz_o; 2277 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2278 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2279 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2280 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2281 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2282 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2283 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2284 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2285 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2286 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2287 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2288 ierr = MatHeaderReplace(Y,B); 2289 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2290 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2291 } 2292 PetscFunctionReturn(0); 2293 } 2294 2295 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2296 2297 #undef __FUNCT__ 2298 #define __FUNCT__ "MatConjugate_MPIAIJ" 2299 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2300 { 2301 #if defined(PETSC_USE_COMPLEX) 2302 PetscErrorCode ierr; 2303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2304 2305 PetscFunctionBegin; 2306 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2307 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2308 #else 2309 PetscFunctionBegin; 2310 #endif 2311 PetscFunctionReturn(0); 2312 } 2313 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "MatRealPart_MPIAIJ" 2316 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2317 { 2318 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2319 PetscErrorCode ierr; 2320 2321 PetscFunctionBegin; 2322 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2323 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2324 PetscFunctionReturn(0); 2325 } 2326 2327 #undef __FUNCT__ 2328 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2329 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2330 { 2331 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2336 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 #ifdef PETSC_HAVE_PBGL 2341 2342 #include <boost/parallel/mpi/bsp_process_group.hpp> 2343 #include <boost/graph/distributed/ilu_default_graph.hpp> 2344 #include <boost/graph/distributed/ilu_0_block.hpp> 2345 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2346 #include <boost/graph/distributed/petsc/interface.hpp> 2347 #include <boost/multi_array.hpp> 2348 #include <boost/parallel/distributed_property_map->hpp> 2349 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2352 /* 2353 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2354 */ 2355 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2356 { 2357 namespace petsc = boost::distributed::petsc; 2358 2359 namespace graph_dist = boost::graph::distributed; 2360 using boost::graph::distributed::ilu_default::process_group_type; 2361 using boost::graph::ilu_permuted; 2362 2363 PetscBool row_identity, col_identity; 2364 PetscContainer c; 2365 PetscInt m, n, M, N; 2366 PetscErrorCode ierr; 2367 2368 PetscFunctionBegin; 2369 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2370 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2371 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2372 if (!row_identity || !col_identity) { 2373 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2374 } 2375 2376 process_group_type pg; 2377 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2378 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2379 lgraph_type& level_graph = *lgraph_p; 2380 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2381 2382 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2383 ilu_permuted(level_graph); 2384 2385 /* put together the new matrix */ 2386 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2387 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2388 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2389 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2390 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2391 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2392 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2394 2395 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2396 ierr = PetscContainerSetPointer(c, lgraph_p); 2397 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2398 ierr = PetscContainerDestroy(&c); 2399 PetscFunctionReturn(0); 2400 } 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2404 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2405 { 2406 PetscFunctionBegin; 2407 PetscFunctionReturn(0); 2408 } 2409 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "MatSolve_MPIAIJ" 2412 /* 2413 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2414 */ 2415 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2416 { 2417 namespace graph_dist = boost::graph::distributed; 2418 2419 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2420 lgraph_type* lgraph_p; 2421 PetscContainer c; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2426 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2427 ierr = VecCopy(b, x);CHKERRQ(ierr); 2428 2429 PetscScalar* array_x; 2430 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2431 PetscInt sx; 2432 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2433 2434 PetscScalar* array_b; 2435 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2436 PetscInt sb; 2437 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2438 2439 lgraph_type& level_graph = *lgraph_p; 2440 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2441 2442 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2443 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2444 ref_x(array_x, boost::extents[num_vertices(graph)]); 2445 2446 typedef boost::iterator_property_map<array_ref_type::iterator, 2447 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2448 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2449 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2450 2451 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2452 2453 PetscFunctionReturn(0); 2454 } 2455 #endif 2456 2457 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2458 PetscInt nzlocal,nsends,nrecvs; 2459 PetscMPIInt *send_rank,*recv_rank; 2460 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2461 PetscScalar *sbuf_a,**rbuf_a; 2462 PetscErrorCode (*Destroy)(Mat); 2463 } Mat_Redundant; 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2467 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2468 { 2469 PetscErrorCode ierr; 2470 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2471 PetscInt i; 2472 2473 PetscFunctionBegin; 2474 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2475 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2476 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2477 for (i=0; i<redund->nrecvs; i++){ 2478 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2479 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2480 } 2481 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2482 ierr = PetscFree(redund);CHKERRQ(ierr); 2483 PetscFunctionReturn(0); 2484 } 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatDestroy_MatRedundant" 2488 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2489 { 2490 PetscErrorCode ierr; 2491 PetscContainer container; 2492 Mat_Redundant *redund=PETSC_NULL; 2493 2494 PetscFunctionBegin; 2495 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2496 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2497 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2498 A->ops->destroy = redund->Destroy; 2499 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2500 if (A->ops->destroy) { 2501 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2502 } 2503 PetscFunctionReturn(0); 2504 } 2505 2506 #undef __FUNCT__ 2507 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2508 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2509 { 2510 PetscMPIInt rank,size; 2511 MPI_Comm comm=((PetscObject)mat)->comm; 2512 PetscErrorCode ierr; 2513 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2514 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2515 PetscInt *rowrange=mat->rmap->range; 2516 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2517 Mat A=aij->A,B=aij->B,C=*matredundant; 2518 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2519 PetscScalar *sbuf_a; 2520 PetscInt nzlocal=a->nz+b->nz; 2521 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2522 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2523 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2524 MatScalar *aworkA,*aworkB; 2525 PetscScalar *vals; 2526 PetscMPIInt tag1,tag2,tag3,imdex; 2527 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2528 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2529 MPI_Status recv_status,*send_status; 2530 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2531 PetscInt **rbuf_j=PETSC_NULL; 2532 PetscScalar **rbuf_a=PETSC_NULL; 2533 Mat_Redundant *redund=PETSC_NULL; 2534 PetscContainer container; 2535 2536 PetscFunctionBegin; 2537 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2538 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2539 2540 if (reuse == MAT_REUSE_MATRIX) { 2541 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2542 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2543 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2544 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2545 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2546 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2547 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2548 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2549 2550 nsends = redund->nsends; 2551 nrecvs = redund->nrecvs; 2552 send_rank = redund->send_rank; 2553 recv_rank = redund->recv_rank; 2554 sbuf_nz = redund->sbuf_nz; 2555 rbuf_nz = redund->rbuf_nz; 2556 sbuf_j = redund->sbuf_j; 2557 sbuf_a = redund->sbuf_a; 2558 rbuf_j = redund->rbuf_j; 2559 rbuf_a = redund->rbuf_a; 2560 } 2561 2562 if (reuse == MAT_INITIAL_MATRIX){ 2563 PetscMPIInt subrank,subsize; 2564 PetscInt nleftover,np_subcomm; 2565 /* get the destination processors' id send_rank, nsends and nrecvs */ 2566 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2567 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2568 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2569 np_subcomm = size/nsubcomm; 2570 nleftover = size - nsubcomm*np_subcomm; 2571 nsends = 0; nrecvs = 0; 2572 for (i=0; i<size; i++){ /* i=rank*/ 2573 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2574 send_rank[nsends] = i; nsends++; 2575 recv_rank[nrecvs++] = i; 2576 } 2577 } 2578 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2579 i = size-nleftover-1; 2580 j = 0; 2581 while (j < nsubcomm - nleftover){ 2582 send_rank[nsends++] = i; 2583 i--; j++; 2584 } 2585 } 2586 2587 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2588 for (i=0; i<nleftover; i++){ 2589 recv_rank[nrecvs++] = size-nleftover+i; 2590 } 2591 } 2592 2593 /* allocate sbuf_j, sbuf_a */ 2594 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2595 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2596 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2597 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2598 2599 /* copy mat's local entries into the buffers */ 2600 if (reuse == MAT_INITIAL_MATRIX){ 2601 rownz_max = 0; 2602 rptr = sbuf_j; 2603 cols = sbuf_j + rend-rstart + 1; 2604 vals = sbuf_a; 2605 rptr[0] = 0; 2606 for (i=0; i<rend-rstart; i++){ 2607 row = i + rstart; 2608 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2609 ncols = nzA + nzB; 2610 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2611 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2612 /* load the column indices for this row into cols */ 2613 lwrite = 0; 2614 for (l=0; l<nzB; l++) { 2615 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2616 vals[lwrite] = aworkB[l]; 2617 cols[lwrite++] = ctmp; 2618 } 2619 } 2620 for (l=0; l<nzA; l++){ 2621 vals[lwrite] = aworkA[l]; 2622 cols[lwrite++] = cstart + cworkA[l]; 2623 } 2624 for (l=0; l<nzB; l++) { 2625 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2626 vals[lwrite] = aworkB[l]; 2627 cols[lwrite++] = ctmp; 2628 } 2629 } 2630 vals += ncols; 2631 cols += ncols; 2632 rptr[i+1] = rptr[i] + ncols; 2633 if (rownz_max < ncols) rownz_max = ncols; 2634 } 2635 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2636 } else { /* only copy matrix values into sbuf_a */ 2637 rptr = sbuf_j; 2638 vals = sbuf_a; 2639 rptr[0] = 0; 2640 for (i=0; i<rend-rstart; i++){ 2641 row = i + rstart; 2642 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2643 ncols = nzA + nzB; 2644 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2645 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2646 lwrite = 0; 2647 for (l=0; l<nzB; l++) { 2648 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2649 } 2650 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2651 for (l=0; l<nzB; l++) { 2652 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2653 } 2654 vals += ncols; 2655 rptr[i+1] = rptr[i] + ncols; 2656 } 2657 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2658 2659 /* send nzlocal to others, and recv other's nzlocal */ 2660 /*--------------------------------------------------*/ 2661 if (reuse == MAT_INITIAL_MATRIX){ 2662 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2663 s_waits2 = s_waits3 + nsends; 2664 s_waits1 = s_waits2 + nsends; 2665 r_waits1 = s_waits1 + nsends; 2666 r_waits2 = r_waits1 + nrecvs; 2667 r_waits3 = r_waits2 + nrecvs; 2668 } else { 2669 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2670 r_waits3 = s_waits3 + nsends; 2671 } 2672 2673 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2674 if (reuse == MAT_INITIAL_MATRIX){ 2675 /* get new tags to keep the communication clean */ 2676 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2677 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2678 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2679 2680 /* post receives of other's nzlocal */ 2681 for (i=0; i<nrecvs; i++){ 2682 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2683 } 2684 /* send nzlocal to others */ 2685 for (i=0; i<nsends; i++){ 2686 sbuf_nz[i] = nzlocal; 2687 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2688 } 2689 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2690 count = nrecvs; 2691 while (count) { 2692 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2693 recv_rank[imdex] = recv_status.MPI_SOURCE; 2694 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2695 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2696 2697 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2698 rbuf_nz[imdex] += i + 2; 2699 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2700 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2701 count--; 2702 } 2703 /* wait on sends of nzlocal */ 2704 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2705 /* send mat->i,j to others, and recv from other's */ 2706 /*------------------------------------------------*/ 2707 for (i=0; i<nsends; i++){ 2708 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2709 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2710 } 2711 /* wait on receives of mat->i,j */ 2712 /*------------------------------*/ 2713 count = nrecvs; 2714 while (count) { 2715 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2716 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2717 count--; 2718 } 2719 /* wait on sends of mat->i,j */ 2720 /*---------------------------*/ 2721 if (nsends) { 2722 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2723 } 2724 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2725 2726 /* post receives, send and receive mat->a */ 2727 /*----------------------------------------*/ 2728 for (imdex=0; imdex<nrecvs; imdex++) { 2729 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2730 } 2731 for (i=0; i<nsends; i++){ 2732 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2733 } 2734 count = nrecvs; 2735 while (count) { 2736 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2737 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2738 count--; 2739 } 2740 if (nsends) { 2741 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2742 } 2743 2744 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2745 2746 /* create redundant matrix */ 2747 /*-------------------------*/ 2748 if (reuse == MAT_INITIAL_MATRIX){ 2749 /* compute rownz_max for preallocation */ 2750 for (imdex=0; imdex<nrecvs; imdex++){ 2751 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2752 rptr = rbuf_j[imdex]; 2753 for (i=0; i<j; i++){ 2754 ncols = rptr[i+1] - rptr[i]; 2755 if (rownz_max < ncols) rownz_max = ncols; 2756 } 2757 } 2758 2759 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2760 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2761 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2762 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2763 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2764 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2765 } else { 2766 C = *matredundant; 2767 } 2768 2769 /* insert local matrix entries */ 2770 rptr = sbuf_j; 2771 cols = sbuf_j + rend-rstart + 1; 2772 vals = sbuf_a; 2773 for (i=0; i<rend-rstart; i++){ 2774 row = i + rstart; 2775 ncols = rptr[i+1] - rptr[i]; 2776 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2777 vals += ncols; 2778 cols += ncols; 2779 } 2780 /* insert received matrix entries */ 2781 for (imdex=0; imdex<nrecvs; imdex++){ 2782 rstart = rowrange[recv_rank[imdex]]; 2783 rend = rowrange[recv_rank[imdex]+1]; 2784 rptr = rbuf_j[imdex]; 2785 cols = rbuf_j[imdex] + rend-rstart + 1; 2786 vals = rbuf_a[imdex]; 2787 for (i=0; i<rend-rstart; i++){ 2788 row = i + rstart; 2789 ncols = rptr[i+1] - rptr[i]; 2790 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2791 vals += ncols; 2792 cols += ncols; 2793 } 2794 } 2795 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2796 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2797 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2798 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2799 if (reuse == MAT_INITIAL_MATRIX) { 2800 PetscContainer container; 2801 *matredundant = C; 2802 /* create a supporting struct and attach it to C for reuse */ 2803 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2804 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2805 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2806 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2807 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2808 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2809 2810 redund->nzlocal = nzlocal; 2811 redund->nsends = nsends; 2812 redund->nrecvs = nrecvs; 2813 redund->send_rank = send_rank; 2814 redund->recv_rank = recv_rank; 2815 redund->sbuf_nz = sbuf_nz; 2816 redund->rbuf_nz = rbuf_nz; 2817 redund->sbuf_j = sbuf_j; 2818 redund->sbuf_a = sbuf_a; 2819 redund->rbuf_j = rbuf_j; 2820 redund->rbuf_a = rbuf_a; 2821 2822 redund->Destroy = C->ops->destroy; 2823 C->ops->destroy = MatDestroy_MatRedundant; 2824 } 2825 PetscFunctionReturn(0); 2826 } 2827 2828 #undef __FUNCT__ 2829 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2830 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2831 { 2832 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2833 PetscErrorCode ierr; 2834 PetscInt i,*idxb = 0; 2835 PetscScalar *va,*vb; 2836 Vec vtmp; 2837 2838 PetscFunctionBegin; 2839 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2840 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2841 if (idx) { 2842 for (i=0; i<A->rmap->n; i++) { 2843 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2844 } 2845 } 2846 2847 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2848 if (idx) { 2849 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2850 } 2851 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2852 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2853 2854 for (i=0; i<A->rmap->n; i++){ 2855 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2856 va[i] = vb[i]; 2857 if (idx) idx[i] = a->garray[idxb[i]]; 2858 } 2859 } 2860 2861 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2862 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2863 ierr = PetscFree(idxb);CHKERRQ(ierr); 2864 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2870 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2871 { 2872 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2873 PetscErrorCode ierr; 2874 PetscInt i,*idxb = 0; 2875 PetscScalar *va,*vb; 2876 Vec vtmp; 2877 2878 PetscFunctionBegin; 2879 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2880 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2881 if (idx) { 2882 for (i=0; i<A->cmap->n; i++) { 2883 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2884 } 2885 } 2886 2887 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2888 if (idx) { 2889 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2890 } 2891 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2892 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2893 2894 for (i=0; i<A->rmap->n; i++){ 2895 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2896 va[i] = vb[i]; 2897 if (idx) idx[i] = a->garray[idxb[i]]; 2898 } 2899 } 2900 2901 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2902 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2903 ierr = PetscFree(idxb);CHKERRQ(ierr); 2904 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2905 PetscFunctionReturn(0); 2906 } 2907 2908 #undef __FUNCT__ 2909 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2910 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2911 { 2912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2913 PetscInt n = A->rmap->n; 2914 PetscInt cstart = A->cmap->rstart; 2915 PetscInt *cmap = mat->garray; 2916 PetscInt *diagIdx, *offdiagIdx; 2917 Vec diagV, offdiagV; 2918 PetscScalar *a, *diagA, *offdiagA; 2919 PetscInt r; 2920 PetscErrorCode ierr; 2921 2922 PetscFunctionBegin; 2923 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2924 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2925 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2926 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2927 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2928 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2929 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2930 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2931 for(r = 0; r < n; ++r) { 2932 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2933 a[r] = diagA[r]; 2934 idx[r] = cstart + diagIdx[r]; 2935 } else { 2936 a[r] = offdiagA[r]; 2937 idx[r] = cmap[offdiagIdx[r]]; 2938 } 2939 } 2940 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2941 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2942 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2943 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2944 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2945 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2946 PetscFunctionReturn(0); 2947 } 2948 2949 #undef __FUNCT__ 2950 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2951 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2952 { 2953 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2954 PetscInt n = A->rmap->n; 2955 PetscInt cstart = A->cmap->rstart; 2956 PetscInt *cmap = mat->garray; 2957 PetscInt *diagIdx, *offdiagIdx; 2958 Vec diagV, offdiagV; 2959 PetscScalar *a, *diagA, *offdiagA; 2960 PetscInt r; 2961 PetscErrorCode ierr; 2962 2963 PetscFunctionBegin; 2964 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2965 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2966 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2967 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2968 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2969 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2970 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2971 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2972 for(r = 0; r < n; ++r) { 2973 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2974 a[r] = diagA[r]; 2975 idx[r] = cstart + diagIdx[r]; 2976 } else { 2977 a[r] = offdiagA[r]; 2978 idx[r] = cmap[offdiagIdx[r]]; 2979 } 2980 } 2981 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2982 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2983 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2984 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2985 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2986 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2992 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2993 { 2994 PetscErrorCode ierr; 2995 Mat *dummy; 2996 2997 PetscFunctionBegin; 2998 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2999 *newmat = *dummy; 3000 ierr = PetscFree(dummy);CHKERRQ(ierr); 3001 PetscFunctionReturn(0); 3002 } 3003 3004 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3005 3006 #undef __FUNCT__ 3007 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3008 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3009 { 3010 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3011 PetscErrorCode ierr; 3012 3013 PetscFunctionBegin; 3014 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3015 PetscFunctionReturn(0); 3016 } 3017 3018 #undef __FUNCT__ 3019 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3020 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3021 { 3022 PetscErrorCode ierr; 3023 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3024 3025 PetscFunctionBegin; 3026 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3027 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3028 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3029 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3030 PetscFunctionReturn(0); 3031 } 3032 3033 /* -------------------------------------------------------------------*/ 3034 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3035 MatGetRow_MPIAIJ, 3036 MatRestoreRow_MPIAIJ, 3037 MatMult_MPIAIJ, 3038 /* 4*/ MatMultAdd_MPIAIJ, 3039 MatMultTranspose_MPIAIJ, 3040 MatMultTransposeAdd_MPIAIJ, 3041 #ifdef PETSC_HAVE_PBGL 3042 MatSolve_MPIAIJ, 3043 #else 3044 0, 3045 #endif 3046 0, 3047 0, 3048 /*10*/ 0, 3049 0, 3050 0, 3051 MatSOR_MPIAIJ, 3052 MatTranspose_MPIAIJ, 3053 /*15*/ MatGetInfo_MPIAIJ, 3054 MatEqual_MPIAIJ, 3055 MatGetDiagonal_MPIAIJ, 3056 MatDiagonalScale_MPIAIJ, 3057 MatNorm_MPIAIJ, 3058 /*20*/ MatAssemblyBegin_MPIAIJ, 3059 MatAssemblyEnd_MPIAIJ, 3060 MatSetOption_MPIAIJ, 3061 MatZeroEntries_MPIAIJ, 3062 /*24*/ MatZeroRows_MPIAIJ, 3063 0, 3064 #ifdef PETSC_HAVE_PBGL 3065 0, 3066 #else 3067 0, 3068 #endif 3069 0, 3070 0, 3071 /*29*/ MatSetUp_MPIAIJ, 3072 #ifdef PETSC_HAVE_PBGL 3073 0, 3074 #else 3075 0, 3076 #endif 3077 0, 3078 0, 3079 0, 3080 /*34*/ MatDuplicate_MPIAIJ, 3081 0, 3082 0, 3083 0, 3084 0, 3085 /*39*/ MatAXPY_MPIAIJ, 3086 MatGetSubMatrices_MPIAIJ, 3087 MatIncreaseOverlap_MPIAIJ, 3088 MatGetValues_MPIAIJ, 3089 MatCopy_MPIAIJ, 3090 /*44*/ MatGetRowMax_MPIAIJ, 3091 MatScale_MPIAIJ, 3092 0, 3093 0, 3094 MatZeroRowsColumns_MPIAIJ, 3095 /*49*/ MatSetRandom_MPIAIJ, 3096 0, 3097 0, 3098 0, 3099 0, 3100 /*54*/ MatFDColoringCreate_MPIAIJ, 3101 0, 3102 MatSetUnfactored_MPIAIJ, 3103 0, /* MatPermute_MPIAIJ, impl currently broken */ 3104 0, 3105 /*59*/ MatGetSubMatrix_MPIAIJ, 3106 MatDestroy_MPIAIJ, 3107 MatView_MPIAIJ, 3108 0, 3109 0, 3110 /*64*/ 0, 3111 0, 3112 0, 3113 0, 3114 0, 3115 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3116 MatGetRowMinAbs_MPIAIJ, 3117 0, 3118 MatSetColoring_MPIAIJ, 3119 #if defined(PETSC_HAVE_ADIC) 3120 MatSetValuesAdic_MPIAIJ, 3121 #else 3122 0, 3123 #endif 3124 MatSetValuesAdifor_MPIAIJ, 3125 /*75*/ MatFDColoringApply_AIJ, 3126 0, 3127 0, 3128 0, 3129 MatFindZeroDiagonals_MPIAIJ, 3130 /*80*/ 0, 3131 0, 3132 0, 3133 /*83*/ MatLoad_MPIAIJ, 3134 0, 3135 0, 3136 0, 3137 0, 3138 0, 3139 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3140 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3141 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3142 MatPtAP_Basic, 3143 MatPtAPSymbolic_MPIAIJ, 3144 /*94*/ MatPtAPNumeric_MPIAIJ, 3145 0, 3146 0, 3147 0, 3148 0, 3149 /*99*/ 0, 3150 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3151 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3152 MatConjugate_MPIAIJ, 3153 0, 3154 /*104*/MatSetValuesRow_MPIAIJ, 3155 MatRealPart_MPIAIJ, 3156 MatImaginaryPart_MPIAIJ, 3157 0, 3158 0, 3159 /*109*/0, 3160 MatGetRedundantMatrix_MPIAIJ, 3161 MatGetRowMin_MPIAIJ, 3162 0, 3163 0, 3164 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3165 0, 3166 0, 3167 0, 3168 0, 3169 /*119*/0, 3170 0, 3171 0, 3172 0, 3173 MatGetMultiProcBlock_MPIAIJ, 3174 /*124*/MatFindNonzeroRows_MPIAIJ, 3175 MatGetColumnNorms_MPIAIJ, 3176 MatInvertBlockDiagonal_MPIAIJ, 3177 0, 3178 MatGetSubMatricesParallel_MPIAIJ, 3179 /*129*/0, 3180 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3181 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3182 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3183 0, 3184 /*134*/0, 3185 0, 3186 0, 3187 0, 3188 0 3189 }; 3190 3191 /* ----------------------------------------------------------------------------------------*/ 3192 3193 EXTERN_C_BEGIN 3194 #undef __FUNCT__ 3195 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3196 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3197 { 3198 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3199 PetscErrorCode ierr; 3200 3201 PetscFunctionBegin; 3202 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3203 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3204 PetscFunctionReturn(0); 3205 } 3206 EXTERN_C_END 3207 3208 EXTERN_C_BEGIN 3209 #undef __FUNCT__ 3210 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3211 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3212 { 3213 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3214 PetscErrorCode ierr; 3215 3216 PetscFunctionBegin; 3217 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3218 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3219 PetscFunctionReturn(0); 3220 } 3221 EXTERN_C_END 3222 3223 EXTERN_C_BEGIN 3224 #undef __FUNCT__ 3225 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3226 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3227 { 3228 Mat_MPIAIJ *b; 3229 PetscErrorCode ierr; 3230 PetscInt i; 3231 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3232 3233 PetscFunctionBegin; 3234 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3235 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3236 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3237 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3238 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3239 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3240 3241 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3242 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3243 if (d_nnz) { 3244 for (i=0; i<B->rmap->n; i++) { 3245 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3246 } 3247 } 3248 if (o_nnz) { 3249 for (i=0; i<B->rmap->n; i++) { 3250 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3251 } 3252 } 3253 b = (Mat_MPIAIJ*)B->data; 3254 3255 if (!B->preallocated) { 3256 /* Explicitly create 2 MATSEQAIJ matrices. */ 3257 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3258 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3259 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3260 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3261 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3262 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3263 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3264 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3265 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3266 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3267 } 3268 3269 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3270 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3271 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3272 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3273 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3274 B->preallocated = PETSC_TRUE; 3275 PetscFunctionReturn(0); 3276 } 3277 EXTERN_C_END 3278 3279 #undef __FUNCT__ 3280 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3281 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3282 { 3283 Mat mat; 3284 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3285 PetscErrorCode ierr; 3286 3287 PetscFunctionBegin; 3288 *newmat = 0; 3289 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3290 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3291 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3292 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3293 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3294 a = (Mat_MPIAIJ*)mat->data; 3295 3296 mat->factortype = matin->factortype; 3297 mat->rmap->bs = matin->rmap->bs; 3298 mat->cmap->bs = matin->cmap->bs; 3299 mat->assembled = PETSC_TRUE; 3300 mat->insertmode = NOT_SET_VALUES; 3301 mat->preallocated = PETSC_TRUE; 3302 3303 a->size = oldmat->size; 3304 a->rank = oldmat->rank; 3305 a->donotstash = oldmat->donotstash; 3306 a->roworiented = oldmat->roworiented; 3307 a->rowindices = 0; 3308 a->rowvalues = 0; 3309 a->getrowactive = PETSC_FALSE; 3310 3311 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3312 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3313 3314 if (oldmat->colmap) { 3315 #if defined (PETSC_USE_CTABLE) 3316 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3317 #else 3318 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3319 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3320 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3321 #endif 3322 } else a->colmap = 0; 3323 if (oldmat->garray) { 3324 PetscInt len; 3325 len = oldmat->B->cmap->n; 3326 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3327 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3328 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3329 } else a->garray = 0; 3330 3331 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3332 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3333 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3334 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3335 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3336 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3337 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3338 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3339 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3340 *newmat = mat; 3341 PetscFunctionReturn(0); 3342 } 3343 3344 3345 3346 #undef __FUNCT__ 3347 #define __FUNCT__ "MatLoad_MPIAIJ" 3348 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3349 { 3350 PetscScalar *vals,*svals; 3351 MPI_Comm comm = ((PetscObject)viewer)->comm; 3352 PetscErrorCode ierr; 3353 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3354 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3355 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3356 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3357 PetscInt cend,cstart,n,*rowners,sizesset=1; 3358 int fd; 3359 PetscInt bs = 1; 3360 3361 PetscFunctionBegin; 3362 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3363 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3364 if (!rank) { 3365 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3366 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3367 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3368 } 3369 3370 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3371 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3372 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3373 3374 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3375 3376 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3377 M = header[1]; N = header[2]; 3378 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3379 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3380 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3381 3382 /* If global sizes are set, check if they are consistent with that given in the file */ 3383 if (sizesset) { 3384 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3385 } 3386 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3387 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3388 3389 /* determine ownership of all (block) rows */ 3390 if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3391 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3392 else m = newMat->rmap->n; /* Set by user */ 3393 3394 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3395 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3396 3397 /* First process needs enough room for process with most rows */ 3398 if (!rank) { 3399 mmax = rowners[1]; 3400 for(i=2; i<=size; i++) { 3401 mmax = PetscMax(mmax, rowners[i]); 3402 } 3403 } else mmax = m; 3404 3405 rowners[0] = 0; 3406 for (i=2; i<=size; i++) { 3407 rowners[i] += rowners[i-1]; 3408 } 3409 rstart = rowners[rank]; 3410 rend = rowners[rank+1]; 3411 3412 /* distribute row lengths to all processors */ 3413 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3414 if (!rank) { 3415 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3416 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3417 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3418 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3419 for (j=0; j<m; j++) { 3420 procsnz[0] += ourlens[j]; 3421 } 3422 for (i=1; i<size; i++) { 3423 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3424 /* calculate the number of nonzeros on each processor */ 3425 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3426 procsnz[i] += rowlengths[j]; 3427 } 3428 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3429 } 3430 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3431 } else { 3432 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3433 } 3434 3435 if (!rank) { 3436 /* determine max buffer needed and allocate it */ 3437 maxnz = 0; 3438 for (i=0; i<size; i++) { 3439 maxnz = PetscMax(maxnz,procsnz[i]); 3440 } 3441 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3442 3443 /* read in my part of the matrix column indices */ 3444 nz = procsnz[0]; 3445 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3446 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3447 3448 /* read in every one elses and ship off */ 3449 for (i=1; i<size; i++) { 3450 nz = procsnz[i]; 3451 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3452 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3453 } 3454 ierr = PetscFree(cols);CHKERRQ(ierr); 3455 } else { 3456 /* determine buffer space needed for message */ 3457 nz = 0; 3458 for (i=0; i<m; i++) { 3459 nz += ourlens[i]; 3460 } 3461 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3462 3463 /* receive message of column indices*/ 3464 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3465 } 3466 3467 /* determine column ownership if matrix is not square */ 3468 if (N != M) { 3469 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3470 else n = newMat->cmap->n; 3471 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3472 cstart = cend - n; 3473 } else { 3474 cstart = rstart; 3475 cend = rend; 3476 n = cend - cstart; 3477 } 3478 3479 /* loop over local rows, determining number of off diagonal entries */ 3480 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3481 jj = 0; 3482 for (i=0; i<m; i++) { 3483 for (j=0; j<ourlens[i]; j++) { 3484 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3485 jj++; 3486 } 3487 } 3488 3489 for (i=0; i<m; i++) { 3490 ourlens[i] -= offlens[i]; 3491 } 3492 if (!sizesset) { 3493 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3494 } 3495 3496 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3497 3498 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3499 3500 for (i=0; i<m; i++) { 3501 ourlens[i] += offlens[i]; 3502 } 3503 3504 if (!rank) { 3505 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3506 3507 /* read in my part of the matrix numerical values */ 3508 nz = procsnz[0]; 3509 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3510 3511 /* insert into matrix */ 3512 jj = rstart; 3513 smycols = mycols; 3514 svals = vals; 3515 for (i=0; i<m; i++) { 3516 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3517 smycols += ourlens[i]; 3518 svals += ourlens[i]; 3519 jj++; 3520 } 3521 3522 /* read in other processors and ship out */ 3523 for (i=1; i<size; i++) { 3524 nz = procsnz[i]; 3525 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3526 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3527 } 3528 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3529 } else { 3530 /* receive numeric values */ 3531 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3532 3533 /* receive message of values*/ 3534 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3535 3536 /* insert into matrix */ 3537 jj = rstart; 3538 smycols = mycols; 3539 svals = vals; 3540 for (i=0; i<m; i++) { 3541 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3542 smycols += ourlens[i]; 3543 svals += ourlens[i]; 3544 jj++; 3545 } 3546 } 3547 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3548 ierr = PetscFree(vals);CHKERRQ(ierr); 3549 ierr = PetscFree(mycols);CHKERRQ(ierr); 3550 ierr = PetscFree(rowners);CHKERRQ(ierr); 3551 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3552 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3553 3554 PetscFunctionReturn(0); 3555 } 3556 3557 #undef __FUNCT__ 3558 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3559 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3560 { 3561 PetscErrorCode ierr; 3562 IS iscol_local; 3563 PetscInt csize; 3564 3565 PetscFunctionBegin; 3566 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3567 if (call == MAT_REUSE_MATRIX) { 3568 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3569 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3570 } else { 3571 PetscInt cbs; 3572 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3573 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3574 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3575 } 3576 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3577 if (call == MAT_INITIAL_MATRIX) { 3578 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3579 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3580 } 3581 PetscFunctionReturn(0); 3582 } 3583 3584 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3585 #undef __FUNCT__ 3586 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3587 /* 3588 Not great since it makes two copies of the submatrix, first an SeqAIJ 3589 in local and then by concatenating the local matrices the end result. 3590 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3591 3592 Note: This requires a sequential iscol with all indices. 3593 */ 3594 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3595 { 3596 PetscErrorCode ierr; 3597 PetscMPIInt rank,size; 3598 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3599 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3600 PetscBool allcolumns, colflag; 3601 Mat M,Mreuse; 3602 MatScalar *vwork,*aa; 3603 MPI_Comm comm = ((PetscObject)mat)->comm; 3604 Mat_SeqAIJ *aij; 3605 3606 3607 PetscFunctionBegin; 3608 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3609 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3610 3611 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3612 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3613 if (colflag && ncol == mat->cmap->N){ 3614 allcolumns = PETSC_TRUE; 3615 } else { 3616 allcolumns = PETSC_FALSE; 3617 } 3618 if (call == MAT_REUSE_MATRIX) { 3619 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3620 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3621 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3622 } else { 3623 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3624 } 3625 3626 /* 3627 m - number of local rows 3628 n - number of columns (same on all processors) 3629 rstart - first row in new global matrix generated 3630 */ 3631 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3632 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3633 if (call == MAT_INITIAL_MATRIX) { 3634 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3635 ii = aij->i; 3636 jj = aij->j; 3637 3638 /* 3639 Determine the number of non-zeros in the diagonal and off-diagonal 3640 portions of the matrix in order to do correct preallocation 3641 */ 3642 3643 /* first get start and end of "diagonal" columns */ 3644 if (csize == PETSC_DECIDE) { 3645 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3646 if (mglobal == n) { /* square matrix */ 3647 nlocal = m; 3648 } else { 3649 nlocal = n/size + ((n % size) > rank); 3650 } 3651 } else { 3652 nlocal = csize; 3653 } 3654 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3655 rstart = rend - nlocal; 3656 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3657 3658 /* next, compute all the lengths */ 3659 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3660 olens = dlens + m; 3661 for (i=0; i<m; i++) { 3662 jend = ii[i+1] - ii[i]; 3663 olen = 0; 3664 dlen = 0; 3665 for (j=0; j<jend; j++) { 3666 if (*jj < rstart || *jj >= rend) olen++; 3667 else dlen++; 3668 jj++; 3669 } 3670 olens[i] = olen; 3671 dlens[i] = dlen; 3672 } 3673 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3674 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3675 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3676 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3677 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3678 ierr = PetscFree(dlens);CHKERRQ(ierr); 3679 } else { 3680 PetscInt ml,nl; 3681 3682 M = *newmat; 3683 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3684 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3685 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3686 /* 3687 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3688 rather than the slower MatSetValues(). 3689 */ 3690 M->was_assembled = PETSC_TRUE; 3691 M->assembled = PETSC_FALSE; 3692 } 3693 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3694 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3695 ii = aij->i; 3696 jj = aij->j; 3697 aa = aij->a; 3698 for (i=0; i<m; i++) { 3699 row = rstart + i; 3700 nz = ii[i+1] - ii[i]; 3701 cwork = jj; jj += nz; 3702 vwork = aa; aa += nz; 3703 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3704 } 3705 3706 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3707 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3708 *newmat = M; 3709 3710 /* save submatrix used in processor for next request */ 3711 if (call == MAT_INITIAL_MATRIX) { 3712 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3713 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3714 } 3715 3716 PetscFunctionReturn(0); 3717 } 3718 3719 EXTERN_C_BEGIN 3720 #undef __FUNCT__ 3721 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3722 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3723 { 3724 PetscInt m,cstart, cend,j,nnz,i,d; 3725 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3726 const PetscInt *JJ; 3727 PetscScalar *values; 3728 PetscErrorCode ierr; 3729 3730 PetscFunctionBegin; 3731 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3732 3733 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3734 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3735 m = B->rmap->n; 3736 cstart = B->cmap->rstart; 3737 cend = B->cmap->rend; 3738 rstart = B->rmap->rstart; 3739 3740 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3741 3742 #if defined(PETSC_USE_DEBUGGING) 3743 for (i=0; i<m; i++) { 3744 nnz = Ii[i+1]- Ii[i]; 3745 JJ = J + Ii[i]; 3746 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3747 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3748 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3749 } 3750 #endif 3751 3752 for (i=0; i<m; i++) { 3753 nnz = Ii[i+1]- Ii[i]; 3754 JJ = J + Ii[i]; 3755 nnz_max = PetscMax(nnz_max,nnz); 3756 d = 0; 3757 for (j=0; j<nnz; j++) { 3758 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3759 } 3760 d_nnz[i] = d; 3761 o_nnz[i] = nnz - d; 3762 } 3763 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3764 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3765 3766 if (v) values = (PetscScalar*)v; 3767 else { 3768 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3769 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3770 } 3771 3772 for (i=0; i<m; i++) { 3773 ii = i + rstart; 3774 nnz = Ii[i+1]- Ii[i]; 3775 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3776 } 3777 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3778 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3779 3780 if (!v) { 3781 ierr = PetscFree(values);CHKERRQ(ierr); 3782 } 3783 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3784 PetscFunctionReturn(0); 3785 } 3786 EXTERN_C_END 3787 3788 #undef __FUNCT__ 3789 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3790 /*@ 3791 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3792 (the default parallel PETSc format). 3793 3794 Collective on MPI_Comm 3795 3796 Input Parameters: 3797 + B - the matrix 3798 . i - the indices into j for the start of each local row (starts with zero) 3799 . j - the column indices for each local row (starts with zero) 3800 - v - optional values in the matrix 3801 3802 Level: developer 3803 3804 Notes: 3805 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3806 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3807 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3808 3809 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3810 3811 The format which is used for the sparse matrix input, is equivalent to a 3812 row-major ordering.. i.e for the following matrix, the input data expected is 3813 as shown: 3814 3815 1 0 0 3816 2 0 3 P0 3817 ------- 3818 4 5 6 P1 3819 3820 Process0 [P0]: rows_owned=[0,1] 3821 i = {0,1,3} [size = nrow+1 = 2+1] 3822 j = {0,0,2} [size = nz = 6] 3823 v = {1,2,3} [size = nz = 6] 3824 3825 Process1 [P1]: rows_owned=[2] 3826 i = {0,3} [size = nrow+1 = 1+1] 3827 j = {0,1,2} [size = nz = 6] 3828 v = {4,5,6} [size = nz = 6] 3829 3830 .keywords: matrix, aij, compressed row, sparse, parallel 3831 3832 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3833 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3834 @*/ 3835 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3836 { 3837 PetscErrorCode ierr; 3838 3839 PetscFunctionBegin; 3840 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3841 PetscFunctionReturn(0); 3842 } 3843 3844 #undef __FUNCT__ 3845 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3846 /*@C 3847 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3848 (the default parallel PETSc format). For good matrix assembly performance 3849 the user should preallocate the matrix storage by setting the parameters 3850 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3851 performance can be increased by more than a factor of 50. 3852 3853 Collective on MPI_Comm 3854 3855 Input Parameters: 3856 + A - the matrix 3857 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3858 (same value is used for all local rows) 3859 . d_nnz - array containing the number of nonzeros in the various rows of the 3860 DIAGONAL portion of the local submatrix (possibly different for each row) 3861 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3862 The size of this array is equal to the number of local rows, i.e 'm'. 3863 For matrices that will be factored, you must leave room for (and set) 3864 the diagonal entry even if it is zero. 3865 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3866 submatrix (same value is used for all local rows). 3867 - o_nnz - array containing the number of nonzeros in the various rows of the 3868 OFF-DIAGONAL portion of the local submatrix (possibly different for 3869 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3870 structure. The size of this array is equal to the number 3871 of local rows, i.e 'm'. 3872 3873 If the *_nnz parameter is given then the *_nz parameter is ignored 3874 3875 The AIJ format (also called the Yale sparse matrix format or 3876 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3877 storage. The stored row and column indices begin with zero. 3878 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3879 3880 The parallel matrix is partitioned such that the first m0 rows belong to 3881 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3882 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3883 3884 The DIAGONAL portion of the local submatrix of a processor can be defined 3885 as the submatrix which is obtained by extraction the part corresponding to 3886 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3887 first row that belongs to the processor, r2 is the last row belonging to 3888 the this processor, and c1-c2 is range of indices of the local part of a 3889 vector suitable for applying the matrix to. This is an mxn matrix. In the 3890 common case of a square matrix, the row and column ranges are the same and 3891 the DIAGONAL part is also square. The remaining portion of the local 3892 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3893 3894 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3895 3896 You can call MatGetInfo() to get information on how effective the preallocation was; 3897 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3898 You can also run with the option -info and look for messages with the string 3899 malloc in them to see if additional memory allocation was needed. 3900 3901 Example usage: 3902 3903 Consider the following 8x8 matrix with 34 non-zero values, that is 3904 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3905 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3906 as follows: 3907 3908 .vb 3909 1 2 0 | 0 3 0 | 0 4 3910 Proc0 0 5 6 | 7 0 0 | 8 0 3911 9 0 10 | 11 0 0 | 12 0 3912 ------------------------------------- 3913 13 0 14 | 15 16 17 | 0 0 3914 Proc1 0 18 0 | 19 20 21 | 0 0 3915 0 0 0 | 22 23 0 | 24 0 3916 ------------------------------------- 3917 Proc2 25 26 27 | 0 0 28 | 29 0 3918 30 0 0 | 31 32 33 | 0 34 3919 .ve 3920 3921 This can be represented as a collection of submatrices as: 3922 3923 .vb 3924 A B C 3925 D E F 3926 G H I 3927 .ve 3928 3929 Where the submatrices A,B,C are owned by proc0, D,E,F are 3930 owned by proc1, G,H,I are owned by proc2. 3931 3932 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3933 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3934 The 'M','N' parameters are 8,8, and have the same values on all procs. 3935 3936 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3937 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3938 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3939 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3940 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3941 matrix, ans [DF] as another SeqAIJ matrix. 3942 3943 When d_nz, o_nz parameters are specified, d_nz storage elements are 3944 allocated for every row of the local diagonal submatrix, and o_nz 3945 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3946 One way to choose d_nz and o_nz is to use the max nonzerors per local 3947 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3948 In this case, the values of d_nz,o_nz are: 3949 .vb 3950 proc0 : dnz = 2, o_nz = 2 3951 proc1 : dnz = 3, o_nz = 2 3952 proc2 : dnz = 1, o_nz = 4 3953 .ve 3954 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3955 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3956 for proc3. i.e we are using 12+15+10=37 storage locations to store 3957 34 values. 3958 3959 When d_nnz, o_nnz parameters are specified, the storage is specified 3960 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3961 In the above case the values for d_nnz,o_nnz are: 3962 .vb 3963 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3964 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3965 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3966 .ve 3967 Here the space allocated is sum of all the above values i.e 34, and 3968 hence pre-allocation is perfect. 3969 3970 Level: intermediate 3971 3972 .keywords: matrix, aij, compressed row, sparse, parallel 3973 3974 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3975 MPIAIJ, MatGetInfo() 3976 @*/ 3977 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3978 { 3979 PetscErrorCode ierr; 3980 3981 PetscFunctionBegin; 3982 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3983 PetscValidType(B,1); 3984 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3985 PetscFunctionReturn(0); 3986 } 3987 3988 #undef __FUNCT__ 3989 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3990 /*@ 3991 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3992 CSR format the local rows. 3993 3994 Collective on MPI_Comm 3995 3996 Input Parameters: 3997 + comm - MPI communicator 3998 . m - number of local rows (Cannot be PETSC_DECIDE) 3999 . n - This value should be the same as the local size used in creating the 4000 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4001 calculated if N is given) For square matrices n is almost always m. 4002 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4003 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4004 . i - row indices 4005 . j - column indices 4006 - a - matrix values 4007 4008 Output Parameter: 4009 . mat - the matrix 4010 4011 Level: intermediate 4012 4013 Notes: 4014 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4015 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4016 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4017 4018 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4019 4020 The format which is used for the sparse matrix input, is equivalent to a 4021 row-major ordering.. i.e for the following matrix, the input data expected is 4022 as shown: 4023 4024 1 0 0 4025 2 0 3 P0 4026 ------- 4027 4 5 6 P1 4028 4029 Process0 [P0]: rows_owned=[0,1] 4030 i = {0,1,3} [size = nrow+1 = 2+1] 4031 j = {0,0,2} [size = nz = 6] 4032 v = {1,2,3} [size = nz = 6] 4033 4034 Process1 [P1]: rows_owned=[2] 4035 i = {0,3} [size = nrow+1 = 1+1] 4036 j = {0,1,2} [size = nz = 6] 4037 v = {4,5,6} [size = nz = 6] 4038 4039 .keywords: matrix, aij, compressed row, sparse, parallel 4040 4041 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4042 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4043 @*/ 4044 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4045 { 4046 PetscErrorCode ierr; 4047 4048 PetscFunctionBegin; 4049 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4050 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4051 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4052 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4053 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4054 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4055 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4056 PetscFunctionReturn(0); 4057 } 4058 4059 #undef __FUNCT__ 4060 #define __FUNCT__ "MatCreateAIJ" 4061 /*@C 4062 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4063 (the default parallel PETSc format). For good matrix assembly performance 4064 the user should preallocate the matrix storage by setting the parameters 4065 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4066 performance can be increased by more than a factor of 50. 4067 4068 Collective on MPI_Comm 4069 4070 Input Parameters: 4071 + comm - MPI communicator 4072 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4073 This value should be the same as the local size used in creating the 4074 y vector for the matrix-vector product y = Ax. 4075 . n - This value should be the same as the local size used in creating the 4076 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4077 calculated if N is given) For square matrices n is almost always m. 4078 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4079 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4080 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4081 (same value is used for all local rows) 4082 . d_nnz - array containing the number of nonzeros in the various rows of the 4083 DIAGONAL portion of the local submatrix (possibly different for each row) 4084 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4085 The size of this array is equal to the number of local rows, i.e 'm'. 4086 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4087 submatrix (same value is used for all local rows). 4088 - o_nnz - array containing the number of nonzeros in the various rows of the 4089 OFF-DIAGONAL portion of the local submatrix (possibly different for 4090 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4091 structure. The size of this array is equal to the number 4092 of local rows, i.e 'm'. 4093 4094 Output Parameter: 4095 . A - the matrix 4096 4097 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4098 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4099 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4100 4101 Notes: 4102 If the *_nnz parameter is given then the *_nz parameter is ignored 4103 4104 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4105 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4106 storage requirements for this matrix. 4107 4108 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4109 processor than it must be used on all processors that share the object for 4110 that argument. 4111 4112 The user MUST specify either the local or global matrix dimensions 4113 (possibly both). 4114 4115 The parallel matrix is partitioned across processors such that the 4116 first m0 rows belong to process 0, the next m1 rows belong to 4117 process 1, the next m2 rows belong to process 2 etc.. where 4118 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4119 values corresponding to [m x N] submatrix. 4120 4121 The columns are logically partitioned with the n0 columns belonging 4122 to 0th partition, the next n1 columns belonging to the next 4123 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4124 4125 The DIAGONAL portion of the local submatrix on any given processor 4126 is the submatrix corresponding to the rows and columns m,n 4127 corresponding to the given processor. i.e diagonal matrix on 4128 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4129 etc. The remaining portion of the local submatrix [m x (N-n)] 4130 constitute the OFF-DIAGONAL portion. The example below better 4131 illustrates this concept. 4132 4133 For a square global matrix we define each processor's diagonal portion 4134 to be its local rows and the corresponding columns (a square submatrix); 4135 each processor's off-diagonal portion encompasses the remainder of the 4136 local matrix (a rectangular submatrix). 4137 4138 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4139 4140 When calling this routine with a single process communicator, a matrix of 4141 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4142 type of communicator, use the construction mechanism: 4143 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4144 4145 By default, this format uses inodes (identical nodes) when possible. 4146 We search for consecutive rows with the same nonzero structure, thereby 4147 reusing matrix information to achieve increased efficiency. 4148 4149 Options Database Keys: 4150 + -mat_no_inode - Do not use inodes 4151 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4152 - -mat_aij_oneindex - Internally use indexing starting at 1 4153 rather than 0. Note that when calling MatSetValues(), 4154 the user still MUST index entries starting at 0! 4155 4156 4157 Example usage: 4158 4159 Consider the following 8x8 matrix with 34 non-zero values, that is 4160 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4161 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4162 as follows: 4163 4164 .vb 4165 1 2 0 | 0 3 0 | 0 4 4166 Proc0 0 5 6 | 7 0 0 | 8 0 4167 9 0 10 | 11 0 0 | 12 0 4168 ------------------------------------- 4169 13 0 14 | 15 16 17 | 0 0 4170 Proc1 0 18 0 | 19 20 21 | 0 0 4171 0 0 0 | 22 23 0 | 24 0 4172 ------------------------------------- 4173 Proc2 25 26 27 | 0 0 28 | 29 0 4174 30 0 0 | 31 32 33 | 0 34 4175 .ve 4176 4177 This can be represented as a collection of submatrices as: 4178 4179 .vb 4180 A B C 4181 D E F 4182 G H I 4183 .ve 4184 4185 Where the submatrices A,B,C are owned by proc0, D,E,F are 4186 owned by proc1, G,H,I are owned by proc2. 4187 4188 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4189 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4190 The 'M','N' parameters are 8,8, and have the same values on all procs. 4191 4192 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4193 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4194 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4195 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4196 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4197 matrix, ans [DF] as another SeqAIJ matrix. 4198 4199 When d_nz, o_nz parameters are specified, d_nz storage elements are 4200 allocated for every row of the local diagonal submatrix, and o_nz 4201 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4202 One way to choose d_nz and o_nz is to use the max nonzerors per local 4203 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4204 In this case, the values of d_nz,o_nz are: 4205 .vb 4206 proc0 : dnz = 2, o_nz = 2 4207 proc1 : dnz = 3, o_nz = 2 4208 proc2 : dnz = 1, o_nz = 4 4209 .ve 4210 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4211 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4212 for proc3. i.e we are using 12+15+10=37 storage locations to store 4213 34 values. 4214 4215 When d_nnz, o_nnz parameters are specified, the storage is specified 4216 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4217 In the above case the values for d_nnz,o_nnz are: 4218 .vb 4219 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4220 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4221 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4222 .ve 4223 Here the space allocated is sum of all the above values i.e 34, and 4224 hence pre-allocation is perfect. 4225 4226 Level: intermediate 4227 4228 .keywords: matrix, aij, compressed row, sparse, parallel 4229 4230 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4231 MPIAIJ, MatCreateMPIAIJWithArrays() 4232 @*/ 4233 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4234 { 4235 PetscErrorCode ierr; 4236 PetscMPIInt size; 4237 4238 PetscFunctionBegin; 4239 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4240 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4241 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4242 if (size > 1) { 4243 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4244 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4245 } else { 4246 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4247 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4248 } 4249 PetscFunctionReturn(0); 4250 } 4251 4252 #undef __FUNCT__ 4253 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4254 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4255 { 4256 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4257 4258 PetscFunctionBegin; 4259 *Ad = a->A; 4260 *Ao = a->B; 4261 *colmap = a->garray; 4262 PetscFunctionReturn(0); 4263 } 4264 4265 #undef __FUNCT__ 4266 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4267 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4268 { 4269 PetscErrorCode ierr; 4270 PetscInt i; 4271 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4272 4273 PetscFunctionBegin; 4274 if (coloring->ctype == IS_COLORING_GLOBAL) { 4275 ISColoringValue *allcolors,*colors; 4276 ISColoring ocoloring; 4277 4278 /* set coloring for diagonal portion */ 4279 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4280 4281 /* set coloring for off-diagonal portion */ 4282 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4283 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4284 for (i=0; i<a->B->cmap->n; i++) { 4285 colors[i] = allcolors[a->garray[i]]; 4286 } 4287 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4288 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4289 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4290 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4291 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4292 ISColoringValue *colors; 4293 PetscInt *larray; 4294 ISColoring ocoloring; 4295 4296 /* set coloring for diagonal portion */ 4297 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4298 for (i=0; i<a->A->cmap->n; i++) { 4299 larray[i] = i + A->cmap->rstart; 4300 } 4301 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4302 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4303 for (i=0; i<a->A->cmap->n; i++) { 4304 colors[i] = coloring->colors[larray[i]]; 4305 } 4306 ierr = PetscFree(larray);CHKERRQ(ierr); 4307 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4308 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4309 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4310 4311 /* set coloring for off-diagonal portion */ 4312 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4313 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4314 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4315 for (i=0; i<a->B->cmap->n; i++) { 4316 colors[i] = coloring->colors[larray[i]]; 4317 } 4318 ierr = PetscFree(larray);CHKERRQ(ierr); 4319 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4320 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4321 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4322 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4323 4324 PetscFunctionReturn(0); 4325 } 4326 4327 #if defined(PETSC_HAVE_ADIC) 4328 #undef __FUNCT__ 4329 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4330 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4331 { 4332 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4333 PetscErrorCode ierr; 4334 4335 PetscFunctionBegin; 4336 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4337 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4338 PetscFunctionReturn(0); 4339 } 4340 #endif 4341 4342 #undef __FUNCT__ 4343 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4344 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4345 { 4346 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4347 PetscErrorCode ierr; 4348 4349 PetscFunctionBegin; 4350 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4351 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4352 PetscFunctionReturn(0); 4353 } 4354 4355 #undef __FUNCT__ 4356 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4357 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4358 { 4359 PetscErrorCode ierr; 4360 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4361 PetscInt *indx; 4362 4363 PetscFunctionBegin; 4364 /* This routine will ONLY return MPIAIJ type matrix */ 4365 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4366 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4367 if (n == PETSC_DECIDE){ 4368 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4369 } 4370 /* Check sum(n) = N */ 4371 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4372 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4373 4374 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4375 rstart -= m; 4376 4377 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4378 for (i=0;i<m;i++) { 4379 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4380 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4381 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4382 } 4383 4384 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4385 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4386 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4387 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4388 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4389 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4390 PetscFunctionReturn(0); 4391 } 4392 4393 #undef __FUNCT__ 4394 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4395 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4396 { 4397 PetscErrorCode ierr; 4398 PetscInt m,N,i,rstart,nnz,Ii; 4399 PetscInt *indx; 4400 PetscScalar *values; 4401 4402 PetscFunctionBegin; 4403 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4404 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4405 for (i=0;i<m;i++) { 4406 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4407 Ii = i + rstart; 4408 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4409 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4410 } 4411 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4412 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4413 PetscFunctionReturn(0); 4414 } 4415 4416 #undef __FUNCT__ 4417 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4418 /*@ 4419 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4420 matrices from each processor 4421 4422 Collective on MPI_Comm 4423 4424 Input Parameters: 4425 + comm - the communicators the parallel matrix will live on 4426 . inmat - the input sequential matrices 4427 . n - number of local columns (or PETSC_DECIDE) 4428 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4429 4430 Output Parameter: 4431 . outmat - the parallel matrix generated 4432 4433 Level: advanced 4434 4435 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4436 4437 @*/ 4438 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4439 { 4440 PetscErrorCode ierr; 4441 4442 PetscFunctionBegin; 4443 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4444 if (scall == MAT_INITIAL_MATRIX){ 4445 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4446 } 4447 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4448 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4449 PetscFunctionReturn(0); 4450 } 4451 4452 #undef __FUNCT__ 4453 #define __FUNCT__ "MatFileSplit" 4454 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4455 { 4456 PetscErrorCode ierr; 4457 PetscMPIInt rank; 4458 PetscInt m,N,i,rstart,nnz; 4459 size_t len; 4460 const PetscInt *indx; 4461 PetscViewer out; 4462 char *name; 4463 Mat B; 4464 const PetscScalar *values; 4465 4466 PetscFunctionBegin; 4467 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4468 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4469 /* Should this be the type of the diagonal block of A? */ 4470 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4471 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4472 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4473 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4474 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4475 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4476 for (i=0;i<m;i++) { 4477 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4478 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4479 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4480 } 4481 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4482 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4483 4484 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4485 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4486 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4487 sprintf(name,"%s.%d",outfile,rank); 4488 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4489 ierr = PetscFree(name); 4490 ierr = MatView(B,out);CHKERRQ(ierr); 4491 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4492 ierr = MatDestroy(&B);CHKERRQ(ierr); 4493 PetscFunctionReturn(0); 4494 } 4495 4496 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4497 #undef __FUNCT__ 4498 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4499 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4500 { 4501 PetscErrorCode ierr; 4502 Mat_Merge_SeqsToMPI *merge; 4503 PetscContainer container; 4504 4505 PetscFunctionBegin; 4506 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4507 if (container) { 4508 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4509 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4510 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4511 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4512 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4513 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4514 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4515 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4516 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4517 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4518 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4519 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4520 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4521 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4522 ierr = PetscFree(merge);CHKERRQ(ierr); 4523 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4524 } 4525 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4526 PetscFunctionReturn(0); 4527 } 4528 4529 #include <../src/mat/utils/freespace.h> 4530 #include <petscbt.h> 4531 4532 #undef __FUNCT__ 4533 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4534 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4535 { 4536 PetscErrorCode ierr; 4537 MPI_Comm comm=((PetscObject)mpimat)->comm; 4538 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4539 PetscMPIInt size,rank,taga,*len_s; 4540 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4541 PetscInt proc,m; 4542 PetscInt **buf_ri,**buf_rj; 4543 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4544 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4545 MPI_Request *s_waits,*r_waits; 4546 MPI_Status *status; 4547 MatScalar *aa=a->a; 4548 MatScalar **abuf_r,*ba_i; 4549 Mat_Merge_SeqsToMPI *merge; 4550 PetscContainer container; 4551 4552 PetscFunctionBegin; 4553 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4554 4555 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4556 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4557 4558 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4559 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4560 4561 bi = merge->bi; 4562 bj = merge->bj; 4563 buf_ri = merge->buf_ri; 4564 buf_rj = merge->buf_rj; 4565 4566 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4567 owners = merge->rowmap->range; 4568 len_s = merge->len_s; 4569 4570 /* send and recv matrix values */ 4571 /*-----------------------------*/ 4572 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4573 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4574 4575 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4576 for (proc=0,k=0; proc<size; proc++){ 4577 if (!len_s[proc]) continue; 4578 i = owners[proc]; 4579 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4580 k++; 4581 } 4582 4583 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4584 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4585 ierr = PetscFree(status);CHKERRQ(ierr); 4586 4587 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4588 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4589 4590 /* insert mat values of mpimat */ 4591 /*----------------------------*/ 4592 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4593 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4594 4595 for (k=0; k<merge->nrecv; k++){ 4596 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4597 nrows = *(buf_ri_k[k]); 4598 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4599 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4600 } 4601 4602 /* set values of ba */ 4603 m = merge->rowmap->n; 4604 for (i=0; i<m; i++) { 4605 arow = owners[rank] + i; 4606 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4607 bnzi = bi[i+1] - bi[i]; 4608 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4609 4610 /* add local non-zero vals of this proc's seqmat into ba */ 4611 anzi = ai[arow+1] - ai[arow]; 4612 aj = a->j + ai[arow]; 4613 aa = a->a + ai[arow]; 4614 nextaj = 0; 4615 for (j=0; nextaj<anzi; j++){ 4616 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4617 ba_i[j] += aa[nextaj++]; 4618 } 4619 } 4620 4621 /* add received vals into ba */ 4622 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4623 /* i-th row */ 4624 if (i == *nextrow[k]) { 4625 anzi = *(nextai[k]+1) - *nextai[k]; 4626 aj = buf_rj[k] + *(nextai[k]); 4627 aa = abuf_r[k] + *(nextai[k]); 4628 nextaj = 0; 4629 for (j=0; nextaj<anzi; j++){ 4630 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4631 ba_i[j] += aa[nextaj++]; 4632 } 4633 } 4634 nextrow[k]++; nextai[k]++; 4635 } 4636 } 4637 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4638 } 4639 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4640 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4641 4642 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4643 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4644 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4645 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4646 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4647 PetscFunctionReturn(0); 4648 } 4649 4650 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4651 4652 #undef __FUNCT__ 4653 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4654 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4655 { 4656 PetscErrorCode ierr; 4657 Mat B_mpi; 4658 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4659 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4660 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4661 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4662 PetscInt len,proc,*dnz,*onz,bs,cbs; 4663 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4664 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4665 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4666 MPI_Status *status; 4667 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4668 PetscBT lnkbt; 4669 Mat_Merge_SeqsToMPI *merge; 4670 PetscContainer container; 4671 4672 PetscFunctionBegin; 4673 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4674 4675 /* make sure it is a PETSc comm */ 4676 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4677 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4678 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4679 4680 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4681 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4682 4683 /* determine row ownership */ 4684 /*---------------------------------------------------------*/ 4685 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4686 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4687 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4688 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4689 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4690 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4691 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4692 4693 m = merge->rowmap->n; 4694 M = merge->rowmap->N; 4695 owners = merge->rowmap->range; 4696 4697 /* determine the number of messages to send, their lengths */ 4698 /*---------------------------------------------------------*/ 4699 len_s = merge->len_s; 4700 4701 len = 0; /* length of buf_si[] */ 4702 merge->nsend = 0; 4703 for (proc=0; proc<size; proc++){ 4704 len_si[proc] = 0; 4705 if (proc == rank){ 4706 len_s[proc] = 0; 4707 } else { 4708 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4709 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4710 } 4711 if (len_s[proc]) { 4712 merge->nsend++; 4713 nrows = 0; 4714 for (i=owners[proc]; i<owners[proc+1]; i++){ 4715 if (ai[i+1] > ai[i]) nrows++; 4716 } 4717 len_si[proc] = 2*(nrows+1); 4718 len += len_si[proc]; 4719 } 4720 } 4721 4722 /* determine the number and length of messages to receive for ij-structure */ 4723 /*-------------------------------------------------------------------------*/ 4724 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4725 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4726 4727 /* post the Irecv of j-structure */ 4728 /*-------------------------------*/ 4729 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4730 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4731 4732 /* post the Isend of j-structure */ 4733 /*--------------------------------*/ 4734 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4735 4736 for (proc=0, k=0; proc<size; proc++){ 4737 if (!len_s[proc]) continue; 4738 i = owners[proc]; 4739 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4740 k++; 4741 } 4742 4743 /* receives and sends of j-structure are complete */ 4744 /*------------------------------------------------*/ 4745 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4746 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4747 4748 /* send and recv i-structure */ 4749 /*---------------------------*/ 4750 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4751 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4752 4753 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4754 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4755 for (proc=0,k=0; proc<size; proc++){ 4756 if (!len_s[proc]) continue; 4757 /* form outgoing message for i-structure: 4758 buf_si[0]: nrows to be sent 4759 [1:nrows]: row index (global) 4760 [nrows+1:2*nrows+1]: i-structure index 4761 */ 4762 /*-------------------------------------------*/ 4763 nrows = len_si[proc]/2 - 1; 4764 buf_si_i = buf_si + nrows+1; 4765 buf_si[0] = nrows; 4766 buf_si_i[0] = 0; 4767 nrows = 0; 4768 for (i=owners[proc]; i<owners[proc+1]; i++){ 4769 anzi = ai[i+1] - ai[i]; 4770 if (anzi) { 4771 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4772 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4773 nrows++; 4774 } 4775 } 4776 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4777 k++; 4778 buf_si += len_si[proc]; 4779 } 4780 4781 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4782 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4783 4784 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4785 for (i=0; i<merge->nrecv; i++){ 4786 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4787 } 4788 4789 ierr = PetscFree(len_si);CHKERRQ(ierr); 4790 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4791 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4792 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4793 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4794 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4795 ierr = PetscFree(status);CHKERRQ(ierr); 4796 4797 /* compute a local seq matrix in each processor */ 4798 /*----------------------------------------------*/ 4799 /* allocate bi array and free space for accumulating nonzero column info */ 4800 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4801 bi[0] = 0; 4802 4803 /* create and initialize a linked list */ 4804 nlnk = N+1; 4805 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4806 4807 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4808 len = 0; 4809 len = ai[owners[rank+1]] - ai[owners[rank]]; 4810 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4811 current_space = free_space; 4812 4813 /* determine symbolic info for each local row */ 4814 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4815 4816 for (k=0; k<merge->nrecv; k++){ 4817 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4818 nrows = *buf_ri_k[k]; 4819 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4820 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4821 } 4822 4823 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4824 len = 0; 4825 for (i=0;i<m;i++) { 4826 bnzi = 0; 4827 /* add local non-zero cols of this proc's seqmat into lnk */ 4828 arow = owners[rank] + i; 4829 anzi = ai[arow+1] - ai[arow]; 4830 aj = a->j + ai[arow]; 4831 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4832 bnzi += nlnk; 4833 /* add received col data into lnk */ 4834 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4835 if (i == *nextrow[k]) { /* i-th row */ 4836 anzi = *(nextai[k]+1) - *nextai[k]; 4837 aj = buf_rj[k] + *nextai[k]; 4838 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4839 bnzi += nlnk; 4840 nextrow[k]++; nextai[k]++; 4841 } 4842 } 4843 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4844 4845 /* if free space is not available, make more free space */ 4846 if (current_space->local_remaining<bnzi) { 4847 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4848 nspacedouble++; 4849 } 4850 /* copy data into free space, then initialize lnk */ 4851 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4852 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4853 4854 current_space->array += bnzi; 4855 current_space->local_used += bnzi; 4856 current_space->local_remaining -= bnzi; 4857 4858 bi[i+1] = bi[i] + bnzi; 4859 } 4860 4861 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4862 4863 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4864 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4865 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4866 4867 /* create symbolic parallel matrix B_mpi */ 4868 /*---------------------------------------*/ 4869 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4870 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4871 if (n==PETSC_DECIDE) { 4872 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4873 } else { 4874 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4875 } 4876 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4877 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4878 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4879 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4880 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4881 4882 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4883 B_mpi->assembled = PETSC_FALSE; 4884 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4885 merge->bi = bi; 4886 merge->bj = bj; 4887 merge->buf_ri = buf_ri; 4888 merge->buf_rj = buf_rj; 4889 merge->coi = PETSC_NULL; 4890 merge->coj = PETSC_NULL; 4891 merge->owners_co = PETSC_NULL; 4892 4893 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4894 4895 /* attach the supporting struct to B_mpi for reuse */ 4896 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4897 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4898 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4899 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4900 *mpimat = B_mpi; 4901 4902 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4903 PetscFunctionReturn(0); 4904 } 4905 4906 #undef __FUNCT__ 4907 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4908 /*@C 4909 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4910 matrices from each processor 4911 4912 Collective on MPI_Comm 4913 4914 Input Parameters: 4915 + comm - the communicators the parallel matrix will live on 4916 . seqmat - the input sequential matrices 4917 . m - number of local rows (or PETSC_DECIDE) 4918 . n - number of local columns (or PETSC_DECIDE) 4919 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4920 4921 Output Parameter: 4922 . mpimat - the parallel matrix generated 4923 4924 Level: advanced 4925 4926 Notes: 4927 The dimensions of the sequential matrix in each processor MUST be the same. 4928 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4929 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4930 @*/ 4931 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4932 { 4933 PetscErrorCode ierr; 4934 PetscMPIInt size; 4935 4936 PetscFunctionBegin; 4937 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4938 if (size == 1){ 4939 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4940 if (scall == MAT_INITIAL_MATRIX){ 4941 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4942 } else { 4943 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4944 } 4945 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4946 PetscFunctionReturn(0); 4947 } 4948 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4949 if (scall == MAT_INITIAL_MATRIX){ 4950 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4951 } 4952 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4953 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4954 PetscFunctionReturn(0); 4955 } 4956 4957 #undef __FUNCT__ 4958 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4959 /*@ 4960 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4961 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4962 with MatGetSize() 4963 4964 Not Collective 4965 4966 Input Parameters: 4967 + A - the matrix 4968 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4969 4970 Output Parameter: 4971 . A_loc - the local sequential matrix generated 4972 4973 Level: developer 4974 4975 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4976 4977 @*/ 4978 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4979 { 4980 PetscErrorCode ierr; 4981 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4982 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4983 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4984 MatScalar *aa=a->a,*ba=b->a,*cam; 4985 PetscScalar *ca; 4986 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4987 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4988 PetscBool match; 4989 4990 PetscFunctionBegin; 4991 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4992 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4993 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4994 if (scall == MAT_INITIAL_MATRIX){ 4995 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4996 ci[0] = 0; 4997 for (i=0; i<am; i++){ 4998 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4999 } 5000 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5001 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5002 k = 0; 5003 for (i=0; i<am; i++) { 5004 ncols_o = bi[i+1] - bi[i]; 5005 ncols_d = ai[i+1] - ai[i]; 5006 /* off-diagonal portion of A */ 5007 for (jo=0; jo<ncols_o; jo++) { 5008 col = cmap[*bj]; 5009 if (col >= cstart) break; 5010 cj[k] = col; bj++; 5011 ca[k++] = *ba++; 5012 } 5013 /* diagonal portion of A */ 5014 for (j=0; j<ncols_d; j++) { 5015 cj[k] = cstart + *aj++; 5016 ca[k++] = *aa++; 5017 } 5018 /* off-diagonal portion of A */ 5019 for (j=jo; j<ncols_o; j++) { 5020 cj[k] = cmap[*bj++]; 5021 ca[k++] = *ba++; 5022 } 5023 } 5024 /* put together the new matrix */ 5025 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5026 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5027 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5028 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5029 mat->free_a = PETSC_TRUE; 5030 mat->free_ij = PETSC_TRUE; 5031 mat->nonew = 0; 5032 } else if (scall == MAT_REUSE_MATRIX){ 5033 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5034 ci = mat->i; cj = mat->j; cam = mat->a; 5035 for (i=0; i<am; i++) { 5036 /* off-diagonal portion of A */ 5037 ncols_o = bi[i+1] - bi[i]; 5038 for (jo=0; jo<ncols_o; jo++) { 5039 col = cmap[*bj]; 5040 if (col >= cstart) break; 5041 *cam++ = *ba++; bj++; 5042 } 5043 /* diagonal portion of A */ 5044 ncols_d = ai[i+1] - ai[i]; 5045 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5046 /* off-diagonal portion of A */ 5047 for (j=jo; j<ncols_o; j++) { 5048 *cam++ = *ba++; bj++; 5049 } 5050 } 5051 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5052 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5053 PetscFunctionReturn(0); 5054 } 5055 5056 #undef __FUNCT__ 5057 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5058 /*@C 5059 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5060 5061 Not Collective 5062 5063 Input Parameters: 5064 + A - the matrix 5065 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5066 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5067 5068 Output Parameter: 5069 . A_loc - the local sequential matrix generated 5070 5071 Level: developer 5072 5073 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5074 5075 @*/ 5076 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5077 { 5078 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5079 PetscErrorCode ierr; 5080 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5081 IS isrowa,iscola; 5082 Mat *aloc; 5083 PetscBool match; 5084 5085 PetscFunctionBegin; 5086 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5087 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5088 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5089 if (!row){ 5090 start = A->rmap->rstart; end = A->rmap->rend; 5091 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5092 } else { 5093 isrowa = *row; 5094 } 5095 if (!col){ 5096 start = A->cmap->rstart; 5097 cmap = a->garray; 5098 nzA = a->A->cmap->n; 5099 nzB = a->B->cmap->n; 5100 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5101 ncols = 0; 5102 for (i=0; i<nzB; i++) { 5103 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5104 else break; 5105 } 5106 imark = i; 5107 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5108 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5109 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5110 } else { 5111 iscola = *col; 5112 } 5113 if (scall != MAT_INITIAL_MATRIX){ 5114 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5115 aloc[0] = *A_loc; 5116 } 5117 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5118 *A_loc = aloc[0]; 5119 ierr = PetscFree(aloc);CHKERRQ(ierr); 5120 if (!row){ 5121 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5122 } 5123 if (!col){ 5124 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5125 } 5126 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5127 PetscFunctionReturn(0); 5128 } 5129 5130 #undef __FUNCT__ 5131 #define __FUNCT__ "MatGetBrowsOfAcols" 5132 /*@C 5133 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5134 5135 Collective on Mat 5136 5137 Input Parameters: 5138 + A,B - the matrices in mpiaij format 5139 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5140 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5141 5142 Output Parameter: 5143 + rowb, colb - index sets of rows and columns of B to extract 5144 - B_seq - the sequential matrix generated 5145 5146 Level: developer 5147 5148 @*/ 5149 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5150 { 5151 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5152 PetscErrorCode ierr; 5153 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5154 IS isrowb,iscolb; 5155 Mat *bseq=PETSC_NULL; 5156 5157 PetscFunctionBegin; 5158 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5159 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5160 } 5161 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5162 5163 if (scall == MAT_INITIAL_MATRIX){ 5164 start = A->cmap->rstart; 5165 cmap = a->garray; 5166 nzA = a->A->cmap->n; 5167 nzB = a->B->cmap->n; 5168 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5169 ncols = 0; 5170 for (i=0; i<nzB; i++) { /* row < local row index */ 5171 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5172 else break; 5173 } 5174 imark = i; 5175 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5176 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5177 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5178 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5179 } else { 5180 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5181 isrowb = *rowb; iscolb = *colb; 5182 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5183 bseq[0] = *B_seq; 5184 } 5185 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5186 *B_seq = bseq[0]; 5187 ierr = PetscFree(bseq);CHKERRQ(ierr); 5188 if (!rowb){ 5189 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5190 } else { 5191 *rowb = isrowb; 5192 } 5193 if (!colb){ 5194 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5195 } else { 5196 *colb = iscolb; 5197 } 5198 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5199 PetscFunctionReturn(0); 5200 } 5201 5202 #undef __FUNCT__ 5203 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5204 /* 5205 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5206 of the OFF-DIAGONAL portion of local A 5207 5208 Collective on Mat 5209 5210 Input Parameters: 5211 + A,B - the matrices in mpiaij format 5212 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5213 5214 Output Parameter: 5215 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5216 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5217 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5218 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5219 5220 Level: developer 5221 5222 */ 5223 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5224 { 5225 VecScatter_MPI_General *gen_to,*gen_from; 5226 PetscErrorCode ierr; 5227 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5228 Mat_SeqAIJ *b_oth; 5229 VecScatter ctx=a->Mvctx; 5230 MPI_Comm comm=((PetscObject)ctx)->comm; 5231 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5232 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5233 PetscScalar *rvalues,*svalues; 5234 MatScalar *b_otha,*bufa,*bufA; 5235 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5236 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5237 MPI_Status *sstatus,rstatus; 5238 PetscMPIInt jj; 5239 PetscInt *cols,sbs,rbs; 5240 PetscScalar *vals; 5241 5242 PetscFunctionBegin; 5243 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5244 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5245 } 5246 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5247 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5248 5249 gen_to = (VecScatter_MPI_General*)ctx->todata; 5250 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5251 rvalues = gen_from->values; /* holds the length of receiving row */ 5252 svalues = gen_to->values; /* holds the length of sending row */ 5253 nrecvs = gen_from->n; 5254 nsends = gen_to->n; 5255 5256 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5257 srow = gen_to->indices; /* local row index to be sent */ 5258 sstarts = gen_to->starts; 5259 sprocs = gen_to->procs; 5260 sstatus = gen_to->sstatus; 5261 sbs = gen_to->bs; 5262 rstarts = gen_from->starts; 5263 rprocs = gen_from->procs; 5264 rbs = gen_from->bs; 5265 5266 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5267 if (scall == MAT_INITIAL_MATRIX){ 5268 /* i-array */ 5269 /*---------*/ 5270 /* post receives */ 5271 for (i=0; i<nrecvs; i++){ 5272 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5273 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5274 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5275 } 5276 5277 /* pack the outgoing message */ 5278 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5279 sstartsj[0] = 0; rstartsj[0] = 0; 5280 len = 0; /* total length of j or a array to be sent */ 5281 k = 0; 5282 for (i=0; i<nsends; i++){ 5283 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5284 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5285 for (j=0; j<nrows; j++) { 5286 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5287 for (l=0; l<sbs; l++){ 5288 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5289 rowlen[j*sbs+l] = ncols; 5290 len += ncols; 5291 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5292 } 5293 k++; 5294 } 5295 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5296 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5297 } 5298 /* recvs and sends of i-array are completed */ 5299 i = nrecvs; 5300 while (i--) { 5301 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5302 } 5303 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5304 5305 /* allocate buffers for sending j and a arrays */ 5306 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5307 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5308 5309 /* create i-array of B_oth */ 5310 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5311 b_othi[0] = 0; 5312 len = 0; /* total length of j or a array to be received */ 5313 k = 0; 5314 for (i=0; i<nrecvs; i++){ 5315 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5316 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5317 for (j=0; j<nrows; j++) { 5318 b_othi[k+1] = b_othi[k] + rowlen[j]; 5319 len += rowlen[j]; k++; 5320 } 5321 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5322 } 5323 5324 /* allocate space for j and a arrrays of B_oth */ 5325 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5326 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5327 5328 /* j-array */ 5329 /*---------*/ 5330 /* post receives of j-array */ 5331 for (i=0; i<nrecvs; i++){ 5332 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5333 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5334 } 5335 5336 /* pack the outgoing message j-array */ 5337 k = 0; 5338 for (i=0; i<nsends; i++){ 5339 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5340 bufJ = bufj+sstartsj[i]; 5341 for (j=0; j<nrows; j++) { 5342 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5343 for (ll=0; ll<sbs; ll++){ 5344 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5345 for (l=0; l<ncols; l++){ 5346 *bufJ++ = cols[l]; 5347 } 5348 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5349 } 5350 } 5351 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5352 } 5353 5354 /* recvs and sends of j-array are completed */ 5355 i = nrecvs; 5356 while (i--) { 5357 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5358 } 5359 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5360 } else if (scall == MAT_REUSE_MATRIX){ 5361 sstartsj = *startsj_s; 5362 rstartsj = *startsj_r; 5363 bufa = *bufa_ptr; 5364 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5365 b_otha = b_oth->a; 5366 } else { 5367 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5368 } 5369 5370 /* a-array */ 5371 /*---------*/ 5372 /* post receives of a-array */ 5373 for (i=0; i<nrecvs; i++){ 5374 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5375 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5376 } 5377 5378 /* pack the outgoing message a-array */ 5379 k = 0; 5380 for (i=0; i<nsends; i++){ 5381 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5382 bufA = bufa+sstartsj[i]; 5383 for (j=0; j<nrows; j++) { 5384 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5385 for (ll=0; ll<sbs; ll++){ 5386 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5387 for (l=0; l<ncols; l++){ 5388 *bufA++ = vals[l]; 5389 } 5390 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5391 } 5392 } 5393 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5394 } 5395 /* recvs and sends of a-array are completed */ 5396 i = nrecvs; 5397 while (i--) { 5398 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5399 } 5400 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5401 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5402 5403 if (scall == MAT_INITIAL_MATRIX){ 5404 /* put together the new matrix */ 5405 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5406 5407 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5408 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5409 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5410 b_oth->free_a = PETSC_TRUE; 5411 b_oth->free_ij = PETSC_TRUE; 5412 b_oth->nonew = 0; 5413 5414 ierr = PetscFree(bufj);CHKERRQ(ierr); 5415 if (!startsj_s || !bufa_ptr){ 5416 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5417 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5418 } else { 5419 *startsj_s = sstartsj; 5420 *startsj_r = rstartsj; 5421 *bufa_ptr = bufa; 5422 } 5423 } 5424 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5425 PetscFunctionReturn(0); 5426 } 5427 5428 #undef __FUNCT__ 5429 #define __FUNCT__ "MatGetCommunicationStructs" 5430 /*@C 5431 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5432 5433 Not Collective 5434 5435 Input Parameters: 5436 . A - The matrix in mpiaij format 5437 5438 Output Parameter: 5439 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5440 . colmap - A map from global column index to local index into lvec 5441 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5442 5443 Level: developer 5444 5445 @*/ 5446 #if defined (PETSC_USE_CTABLE) 5447 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5448 #else 5449 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5450 #endif 5451 { 5452 Mat_MPIAIJ *a; 5453 5454 PetscFunctionBegin; 5455 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5456 PetscValidPointer(lvec, 2); 5457 PetscValidPointer(colmap, 3); 5458 PetscValidPointer(multScatter, 4); 5459 a = (Mat_MPIAIJ *) A->data; 5460 if (lvec) *lvec = a->lvec; 5461 if (colmap) *colmap = a->colmap; 5462 if (multScatter) *multScatter = a->Mvctx; 5463 PetscFunctionReturn(0); 5464 } 5465 5466 EXTERN_C_BEGIN 5467 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5468 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5469 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5470 EXTERN_C_END 5471 5472 #undef __FUNCT__ 5473 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5474 /* 5475 Computes (B'*A')' since computing B*A directly is untenable 5476 5477 n p p 5478 ( ) ( ) ( ) 5479 m ( A ) * n ( B ) = m ( C ) 5480 ( ) ( ) ( ) 5481 5482 */ 5483 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5484 { 5485 PetscErrorCode ierr; 5486 Mat At,Bt,Ct; 5487 5488 PetscFunctionBegin; 5489 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5490 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5491 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5492 ierr = MatDestroy(&At);CHKERRQ(ierr); 5493 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5494 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5495 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5496 PetscFunctionReturn(0); 5497 } 5498 5499 #undef __FUNCT__ 5500 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5501 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5502 { 5503 PetscErrorCode ierr; 5504 PetscInt m=A->rmap->n,n=B->cmap->n; 5505 Mat Cmat; 5506 5507 PetscFunctionBegin; 5508 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5509 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5510 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5511 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5512 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5513 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5514 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5515 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5516 *C = Cmat; 5517 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5518 PetscFunctionReturn(0); 5519 } 5520 5521 /* ----------------------------------------------------------------*/ 5522 #undef __FUNCT__ 5523 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5524 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5525 { 5526 PetscErrorCode ierr; 5527 5528 PetscFunctionBegin; 5529 if (scall == MAT_INITIAL_MATRIX){ 5530 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5531 } 5532 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5533 PetscFunctionReturn(0); 5534 } 5535 5536 EXTERN_C_BEGIN 5537 #if defined(PETSC_HAVE_MUMPS) 5538 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5539 #endif 5540 #if defined(PETSC_HAVE_PASTIX) 5541 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5542 #endif 5543 #if defined(PETSC_HAVE_SUPERLU_DIST) 5544 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5545 #endif 5546 #if defined(PETSC_HAVE_SPOOLES) 5547 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5548 #endif 5549 #if defined(PETSC_HAVE_CLIQUE) 5550 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5551 #endif 5552 EXTERN_C_END 5553 5554 /*MC 5555 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5556 5557 Options Database Keys: 5558 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5559 5560 Level: beginner 5561 5562 .seealso: MatCreateAIJ() 5563 M*/ 5564 5565 EXTERN_C_BEGIN 5566 #undef __FUNCT__ 5567 #define __FUNCT__ "MatCreate_MPIAIJ" 5568 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5569 { 5570 Mat_MPIAIJ *b; 5571 PetscErrorCode ierr; 5572 PetscMPIInt size; 5573 5574 PetscFunctionBegin; 5575 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5576 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5577 B->data = (void*)b; 5578 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5579 B->assembled = PETSC_FALSE; 5580 B->insertmode = NOT_SET_VALUES; 5581 b->size = size; 5582 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5583 5584 /* build cache for off array entries formed */ 5585 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5586 b->donotstash = PETSC_FALSE; 5587 b->colmap = 0; 5588 b->garray = 0; 5589 b->roworiented = PETSC_TRUE; 5590 5591 /* stuff used for matrix vector multiply */ 5592 b->lvec = PETSC_NULL; 5593 b->Mvctx = PETSC_NULL; 5594 5595 /* stuff for MatGetRow() */ 5596 b->rowindices = 0; 5597 b->rowvalues = 0; 5598 b->getrowactive = PETSC_FALSE; 5599 5600 /* flexible pointer used in CUSP/CUSPARSE classes */ 5601 b->spptr = PETSC_NULL; 5602 #if defined(PETSC_HAVE_SPOOLES) 5603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5604 "MatGetFactor_mpiaij_spooles", 5605 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5606 #endif 5607 #if defined(PETSC_HAVE_MUMPS) 5608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5609 "MatGetFactor_aij_mumps", 5610 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5611 #endif 5612 #if defined(PETSC_HAVE_PASTIX) 5613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5614 "MatGetFactor_mpiaij_pastix", 5615 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5616 #endif 5617 #if defined(PETSC_HAVE_SUPERLU_DIST) 5618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5619 "MatGetFactor_mpiaij_superlu_dist", 5620 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5621 #endif 5622 #if defined(PETSC_HAVE_CLIQUE) 5623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5624 "MatGetFactor_aij_clique", 5625 MatGetFactor_aij_clique);CHKERRQ(ierr); 5626 #endif 5627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5628 "MatStoreValues_MPIAIJ", 5629 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5631 "MatRetrieveValues_MPIAIJ", 5632 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5634 "MatGetDiagonalBlock_MPIAIJ", 5635 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5637 "MatIsTranspose_MPIAIJ", 5638 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5639 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5640 "MatMPIAIJSetPreallocation_MPIAIJ", 5641 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5642 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5643 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5644 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5645 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5646 "MatDiagonalScaleLocal_MPIAIJ", 5647 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5648 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5649 "MatConvert_MPIAIJ_MPIAIJPERM", 5650 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5651 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5652 "MatConvert_MPIAIJ_MPIAIJCRL", 5653 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5654 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5655 "MatConvert_MPIAIJ_MPISBAIJ", 5656 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5657 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5658 "MatMatMult_MPIDense_MPIAIJ", 5659 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5660 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5661 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5662 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5664 "MatMatMultNumeric_MPIDense_MPIAIJ", 5665 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5666 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5667 PetscFunctionReturn(0); 5668 } 5669 EXTERN_C_END 5670 5671 #undef __FUNCT__ 5672 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5673 /*@ 5674 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5675 and "off-diagonal" part of the matrix in CSR format. 5676 5677 Collective on MPI_Comm 5678 5679 Input Parameters: 5680 + comm - MPI communicator 5681 . m - number of local rows (Cannot be PETSC_DECIDE) 5682 . n - This value should be the same as the local size used in creating the 5683 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5684 calculated if N is given) For square matrices n is almost always m. 5685 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5686 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5687 . i - row indices for "diagonal" portion of matrix 5688 . j - column indices 5689 . a - matrix values 5690 . oi - row indices for "off-diagonal" portion of matrix 5691 . oj - column indices 5692 - oa - matrix values 5693 5694 Output Parameter: 5695 . mat - the matrix 5696 5697 Level: advanced 5698 5699 Notes: 5700 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5701 must free the arrays once the matrix has been destroyed and not before. 5702 5703 The i and j indices are 0 based 5704 5705 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5706 5707 This sets local rows and cannot be used to set off-processor values. 5708 5709 You cannot later use MatSetValues() to change values in this matrix. 5710 5711 .keywords: matrix, aij, compressed row, sparse, parallel 5712 5713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5714 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5715 @*/ 5716 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5717 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5718 { 5719 PetscErrorCode ierr; 5720 Mat_MPIAIJ *maij; 5721 5722 PetscFunctionBegin; 5723 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5724 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5725 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5726 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5727 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5728 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5729 maij = (Mat_MPIAIJ*) (*mat)->data; 5730 maij->donotstash = PETSC_TRUE; 5731 (*mat)->preallocated = PETSC_TRUE; 5732 5733 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5734 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5735 5736 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5737 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5738 5739 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5740 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5741 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5742 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5743 5744 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5745 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5746 PetscFunctionReturn(0); 5747 } 5748 5749 /* 5750 Special version for direct calls from Fortran 5751 */ 5752 #include <petsc-private/fortranimpl.h> 5753 5754 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5755 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5756 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5757 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5758 #endif 5759 5760 /* Change these macros so can be used in void function */ 5761 #undef CHKERRQ 5762 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5763 #undef SETERRQ2 5764 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5765 #undef SETERRQ3 5766 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5767 #undef SETERRQ 5768 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5769 5770 EXTERN_C_BEGIN 5771 #undef __FUNCT__ 5772 #define __FUNCT__ "matsetvaluesmpiaij_" 5773 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5774 { 5775 Mat mat = *mmat; 5776 PetscInt m = *mm, n = *mn; 5777 InsertMode addv = *maddv; 5778 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5779 PetscScalar value; 5780 PetscErrorCode ierr; 5781 5782 MatCheckPreallocated(mat,1); 5783 if (mat->insertmode == NOT_SET_VALUES) { 5784 mat->insertmode = addv; 5785 } 5786 #if defined(PETSC_USE_DEBUG) 5787 else if (mat->insertmode != addv) { 5788 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5789 } 5790 #endif 5791 { 5792 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5793 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5794 PetscBool roworiented = aij->roworiented; 5795 5796 /* Some Variables required in the macro */ 5797 Mat A = aij->A; 5798 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5799 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5800 MatScalar *aa = a->a; 5801 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5802 Mat B = aij->B; 5803 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5804 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5805 MatScalar *ba = b->a; 5806 5807 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5808 PetscInt nonew = a->nonew; 5809 MatScalar *ap1,*ap2; 5810 5811 PetscFunctionBegin; 5812 for (i=0; i<m; i++) { 5813 if (im[i] < 0) continue; 5814 #if defined(PETSC_USE_DEBUG) 5815 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5816 #endif 5817 if (im[i] >= rstart && im[i] < rend) { 5818 row = im[i] - rstart; 5819 lastcol1 = -1; 5820 rp1 = aj + ai[row]; 5821 ap1 = aa + ai[row]; 5822 rmax1 = aimax[row]; 5823 nrow1 = ailen[row]; 5824 low1 = 0; 5825 high1 = nrow1; 5826 lastcol2 = -1; 5827 rp2 = bj + bi[row]; 5828 ap2 = ba + bi[row]; 5829 rmax2 = bimax[row]; 5830 nrow2 = bilen[row]; 5831 low2 = 0; 5832 high2 = nrow2; 5833 5834 for (j=0; j<n; j++) { 5835 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5836 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5837 if (in[j] >= cstart && in[j] < cend){ 5838 col = in[j] - cstart; 5839 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5840 } else if (in[j] < 0) continue; 5841 #if defined(PETSC_USE_DEBUG) 5842 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5843 #endif 5844 else { 5845 if (mat->was_assembled) { 5846 if (!aij->colmap) { 5847 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5848 } 5849 #if defined (PETSC_USE_CTABLE) 5850 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5851 col--; 5852 #else 5853 col = aij->colmap[in[j]] - 1; 5854 #endif 5855 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5856 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5857 col = in[j]; 5858 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5859 B = aij->B; 5860 b = (Mat_SeqAIJ*)B->data; 5861 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5862 rp2 = bj + bi[row]; 5863 ap2 = ba + bi[row]; 5864 rmax2 = bimax[row]; 5865 nrow2 = bilen[row]; 5866 low2 = 0; 5867 high2 = nrow2; 5868 bm = aij->B->rmap->n; 5869 ba = b->a; 5870 } 5871 } else col = in[j]; 5872 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5873 } 5874 } 5875 } else { 5876 if (!aij->donotstash) { 5877 if (roworiented) { 5878 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5879 } else { 5880 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5881 } 5882 } 5883 } 5884 }} 5885 PetscFunctionReturnVoid(); 5886 } 5887 EXTERN_C_END 5888 5889