1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 6 /*MC 7 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 8 9 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 10 and MATMPIAIJ otherwise. As a result, for single process communicators, 11 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 12 for communicators controlling multiple processes. It is recommended that you call both of 13 the above preallocation routines for simplicity. 14 15 Options Database Keys: 16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 17 18 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 19 enough exist. 20 21 Level: beginner 22 23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 24 M*/ 25 26 /*MC 27 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 28 29 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 30 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 31 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 32 for communicators controlling multiple processes. It is recommended that you call both of 33 the above preallocation routines for simplicity. 34 35 Options Database Keys: 36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 37 38 Level: beginner 39 40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 41 M*/ 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 46 { 47 PetscErrorCode ierr; 48 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 49 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 51 const PetscInt *ia,*ib; 52 const MatScalar *aa,*bb; 53 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 54 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 55 56 PetscFunctionBegin; 57 *keptrows = 0; 58 ia = a->i; 59 ib = b->i; 60 for (i=0; i<m; i++) { 61 na = ia[i+1] - ia[i]; 62 nb = ib[i+1] - ib[i]; 63 if (!na && !nb) { 64 cnt++; 65 goto ok1; 66 } 67 aa = a->a + ia[i]; 68 for (j=0; j<na; j++) { 69 if (aa[j] != 0.0) goto ok1; 70 } 71 bb = b->a + ib[i]; 72 for (j=0; j <nb; j++) { 73 if (bb[j] != 0.0) goto ok1; 74 } 75 cnt++; 76 ok1:; 77 } 78 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 79 if (!n0rows) PetscFunctionReturn(0); 80 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 81 cnt = 0; 82 for (i=0; i<m; i++) { 83 na = ia[i+1] - ia[i]; 84 nb = ib[i+1] - ib[i]; 85 if (!na && !nb) continue; 86 aa = a->a + ia[i]; 87 for (j=0; j<na;j++) { 88 if (aa[j] != 0.0) { 89 rows[cnt++] = rstart + i; 90 goto ok2; 91 } 92 } 93 bb = b->a + ib[i]; 94 for (j=0; j<nb; j++) { 95 if (bb[j] != 0.0) { 96 rows[cnt++] = rstart + i; 97 goto ok2; 98 } 99 } 100 ok2:; 101 } 102 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 109 { 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 111 PetscErrorCode ierr; 112 PetscInt i,rstart,nrows,*rows; 113 114 PetscFunctionBegin; 115 *zrows = NULL; 116 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 117 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 118 for (i=0; i<nrows; i++) rows[i] += rstart; 119 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 126 { 127 PetscErrorCode ierr; 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 129 PetscInt i,n,*garray = aij->garray; 130 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 131 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 132 PetscReal *work; 133 134 PetscFunctionBegin; 135 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 136 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 137 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 */ 181 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 182 { 183 PetscMPIInt rank,size; 184 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 185 PetscErrorCode ierr; 186 Mat mat; 187 Mat_SeqAIJ *gmata; 188 PetscMPIInt tag; 189 MPI_Status status; 190 PetscBool aij; 191 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 192 193 PetscFunctionBegin; 194 CHKMEMQ; 195 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 197 if (!rank) { 198 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 199 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 200 } 201 if (reuse == MAT_INITIAL_MATRIX) { 202 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 203 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 204 if (!rank) { 205 bses[0] = gmat->rmap->bs; 206 bses[1] = gmat->cmap->bs; 207 } 208 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 209 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 210 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 211 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 212 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 213 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 214 215 rowners[0] = 0; 216 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 299 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 300 301 *inmat = mat; 302 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 303 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 304 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 305 mat = *inmat; 306 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 307 if (!rank) { 308 /* send numerical values to other processes */ 309 gmata = (Mat_SeqAIJ*) gmat->data; 310 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 311 gmataa = gmata->a; 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 315 } 316 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 317 } else { 318 /* receive numerical values from process 0*/ 319 nz = Ad->nz + Ao->nz; 320 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 321 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 322 } 323 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 324 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 325 ad = Ad->a; 326 ao = Ao->a; 327 if (mat->rmap->n) { 328 i = 0; 329 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 for (i=1; i<mat->rmap->n; i++) { 333 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 334 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 335 } 336 i--; 337 if (mat->rmap->n) { 338 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 339 } 340 if (rank) { 341 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 342 } 343 } 344 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 345 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 346 CHKMEMQ; 347 PetscFunctionReturn(0); 348 } 349 350 /* 351 Local utility routine that creates a mapping from the global column 352 number to the local number in the off-diagonal part of the local 353 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 354 a slightly higher hash table cost; without it it is not scalable (each processor 355 has an order N integer array but is fast to acess. 356 */ 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 359 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 360 { 361 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt n = aij->B->cmap->n,i; 364 365 PetscFunctionBegin; 366 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 367 #if defined(PETSC_USE_CTABLE) 368 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 369 for (i=0; i<n; i++) { 370 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 371 } 372 #else 373 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 374 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 375 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 377 #endif 378 PetscFunctionReturn(0); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 382 { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col;\ 386 while (high1-low1 > 5) { \ 387 t = (low1+high1)/2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i=low1; _i<high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) ap1[_i] += value; \ 395 else ap1[_i] = value; \ 396 goto a_noinsert; \ 397 } \ 398 } \ 399 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 400 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 401 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 402 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 403 N = nrow1++ - 1; a->nz++; high1++; \ 404 /* shift up all the later entries in this row */ \ 405 for (ii=N; ii>=_i; ii--) { \ 406 rp1[ii+1] = rp1[ii]; \ 407 ap1[ii+1] = ap1[ii]; \ 408 } \ 409 rp1[_i] = col; \ 410 ap1[_i] = value; \ 411 a_noinsert: ; \ 412 ailen[row] = nrow1; \ 413 } 414 415 416 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 417 { \ 418 if (col <= lastcol2) low2 = 0; \ 419 else high2 = nrow2; \ 420 lastcol2 = col; \ 421 while (high2-low2 > 5) { \ 422 t = (low2+high2)/2; \ 423 if (rp2[t] > col) high2 = t; \ 424 else low2 = t; \ 425 } \ 426 for (_i=low2; _i<high2; _i++) { \ 427 if (rp2[_i] > col) break; \ 428 if (rp2[_i] == col) { \ 429 if (addv == ADD_VALUES) ap2[_i] += value; \ 430 else ap2[_i] = value; \ 431 goto b_noinsert; \ 432 } \ 433 } \ 434 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 435 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 437 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 438 N = nrow2++ - 1; b->nz++; high2++; \ 439 /* shift up all the later entries in this row */ \ 440 for (ii=N; ii>=_i; ii--) { \ 441 rp2[ii+1] = rp2[ii]; \ 442 ap2[ii+1] = ap2[ii]; \ 443 } \ 444 rp2[_i] = col; \ 445 ap2[_i] = value; \ 446 b_noinsert: ; \ 447 bilen[row] = nrow2; \ 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 452 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 453 { 454 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 455 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 456 PetscErrorCode ierr; 457 PetscInt l,*garray = mat->garray,diag; 458 459 PetscFunctionBegin; 460 /* code only works for square matrices A */ 461 462 /* find size of row to the left of the diagonal part */ 463 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 464 row = row - diag; 465 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 466 if (garray[b->j[b->i[row]+l]] > diag) break; 467 } 468 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 469 470 /* diagonal part */ 471 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 472 473 /* right of diagonal part */ 474 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatSetValues_MPIAIJ" 480 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 PetscScalar value; 484 PetscErrorCode ierr; 485 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 486 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 487 PetscBool roworiented = aij->roworiented; 488 489 /* Some Variables required in the macro */ 490 Mat A = aij->A; 491 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 492 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 493 MatScalar *aa = a->a; 494 PetscBool ignorezeroentries = a->ignorezeroentries; 495 Mat B = aij->B; 496 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 497 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 498 MatScalar *ba = b->a; 499 500 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 501 PetscInt nonew; 502 MatScalar *ap1,*ap2; 503 504 PetscFunctionBegin; 505 if (v) PetscValidScalarPointer(v,6); 506 for (i=0; i<m; i++) { 507 if (im[i] < 0) continue; 508 #if defined(PETSC_USE_DEBUG) 509 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 510 #endif 511 if (im[i] >= rstart && im[i] < rend) { 512 row = im[i] - rstart; 513 lastcol1 = -1; 514 rp1 = aj + ai[row]; 515 ap1 = aa + ai[row]; 516 rmax1 = aimax[row]; 517 nrow1 = ailen[row]; 518 low1 = 0; 519 high1 = nrow1; 520 lastcol2 = -1; 521 rp2 = bj + bi[row]; 522 ap2 = ba + bi[row]; 523 rmax2 = bimax[row]; 524 nrow2 = bilen[row]; 525 low2 = 0; 526 high2 = nrow2; 527 528 for (j=0; j<n; j++) { 529 if (v) { 530 if (roworiented) value = v[i*n+j]; 531 else value = v[i+j*m]; 532 } else value = 0.0; 533 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 534 if (in[j] >= cstart && in[j] < cend) { 535 col = in[j] - cstart; 536 nonew = a->nonew; 537 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 538 } else if (in[j] < 0) continue; 539 #if defined(PETSC_USE_DEBUG) 540 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 541 #endif 542 else { 543 if (mat->was_assembled) { 544 if (!aij->colmap) { 545 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 546 } 547 #if defined(PETSC_USE_CTABLE) 548 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 549 col--; 550 #else 551 col = aij->colmap[in[j]] - 1; 552 #endif 553 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 554 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 555 col = in[j]; 556 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 557 B = aij->B; 558 b = (Mat_SeqAIJ*)B->data; 559 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 560 rp2 = bj + bi[row]; 561 ap2 = ba + bi[row]; 562 rmax2 = bimax[row]; 563 nrow2 = bilen[row]; 564 low2 = 0; 565 high2 = nrow2; 566 bm = aij->B->rmap->n; 567 ba = b->a; 568 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 569 } else col = in[j]; 570 nonew = b->nonew; 571 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 572 } 573 } 574 } else { 575 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 576 if (!aij->donotstash) { 577 mat->assembled = PETSC_FALSE; 578 if (roworiented) { 579 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 580 } else { 581 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 582 } 583 } 584 } 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatGetValues_MPIAIJ" 591 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 592 { 593 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 596 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 597 598 PetscFunctionBegin; 599 for (i=0; i<m; i++) { 600 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 601 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 602 if (idxm[i] >= rstart && idxm[i] < rend) { 603 row = idxm[i] - rstart; 604 for (j=0; j<n; j++) { 605 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 606 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 607 if (idxn[j] >= cstart && idxn[j] < cend) { 608 col = idxn[j] - cstart; 609 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 610 } else { 611 if (!aij->colmap) { 612 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 613 } 614 #if defined(PETSC_USE_CTABLE) 615 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 616 col--; 617 #else 618 col = aij->colmap[idxn[j]] - 1; 619 #endif 620 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 621 else { 622 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 623 } 624 } 625 } 626 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 627 } 628 PetscFunctionReturn(0); 629 } 630 631 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 635 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 636 { 637 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 638 PetscErrorCode ierr; 639 PetscInt nstash,reallocs; 640 InsertMode addv; 641 642 PetscFunctionBegin; 643 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 644 645 /* make sure all processors are either in INSERTMODE or ADDMODE */ 646 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 647 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 648 mat->insertmode = addv; /* in case this processor had no cache */ 649 650 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 651 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 652 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 659 { 660 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 661 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 662 PetscErrorCode ierr; 663 PetscMPIInt n; 664 PetscInt i,j,rstart,ncols,flg; 665 PetscInt *row,*col; 666 PetscBool other_disassembled; 667 PetscScalar *val; 668 InsertMode addv = mat->insertmode; 669 670 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 671 672 PetscFunctionBegin; 673 if (!aij->donotstash && !mat->nooffprocentries) { 674 while (1) { 675 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 676 if (!flg) break; 677 678 for (i=0; i<n; ) { 679 /* Now identify the consecutive vals belonging to the same row */ 680 for (j=i,rstart=row[j]; j<n; j++) { 681 if (row[j] != rstart) break; 682 } 683 if (j < n) ncols = j-i; 684 else ncols = n-i; 685 /* Now assemble all these values with a single function call */ 686 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 687 688 i = j; 689 } 690 } 691 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 692 } 693 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 694 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 695 696 /* determine if any processor has disassembled, if so we must 697 also disassemble ourselfs, in order that we may reassemble. */ 698 /* 699 if nonzero structure of submatrix B cannot change then we know that 700 no processor disassembled thus we can skip this stuff 701 */ 702 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 703 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 704 if (mat->was_assembled && !other_disassembled) { 705 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 706 } 707 } 708 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 709 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 710 } 711 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 712 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 714 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 715 716 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 717 718 aij->rowvalues = 0; 719 720 /* used by MatAXPY() */ 721 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 722 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 723 724 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 725 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 738 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_MPIAIJ" 744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 745 { 746 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 747 PetscErrorCode ierr; 748 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 749 PetscInt i,*owners = A->rmap->range; 750 PetscInt *nprocs,j,idx,nsends,row; 751 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 752 PetscInt *rvalues,count,base,slen,*source; 753 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 754 MPI_Comm comm = ((PetscObject)A)->comm; 755 MPI_Request *send_waits,*recv_waits; 756 MPI_Status recv_status,*send_status; 757 const PetscScalar *xx; 758 PetscScalar *bb; 759 #if defined(PETSC_DEBUG) 760 PetscBool found = PETSC_FALSE; 761 #endif 762 763 PetscFunctionBegin; 764 /* first count number of contributors to each processor */ 765 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 766 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 767 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 768 j = 0; 769 for (i=0; i<N; i++) { 770 if (lastidx > (idx = rows[i])) j = 0; 771 lastidx = idx; 772 for (; j<size; j++) { 773 if (idx >= owners[j] && idx < owners[j+1]) { 774 nprocs[2*j]++; 775 nprocs[2*j+1] = 1; 776 owner[i] = j; 777 #if defined(PETSC_DEBUG) 778 found = PETSC_TRUE; 779 #endif 780 break; 781 } 782 } 783 #if defined(PETSC_DEBUG) 784 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 785 found = PETSC_FALSE; 786 #endif 787 } 788 nsends = 0; 789 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 790 791 if (A->nooffproczerorows) { 792 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 793 nrecvs = nsends; 794 nmax = N; 795 } else { 796 /* inform other processors of number of messages and max length*/ 797 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 798 } 799 800 /* post receives: */ 801 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 802 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 803 for (i=0; i<nrecvs; i++) { 804 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 805 } 806 807 /* do sends: 808 1) starts[i] gives the starting index in svalues for stuff going to 809 the ith processor 810 */ 811 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 812 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 813 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 814 815 starts[0] = 0; 816 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 817 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 818 819 starts[0] = 0; 820 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 821 count = 0; 822 for (i=0; i<size; i++) { 823 if (nprocs[2*i+1]) { 824 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 825 } 826 } 827 ierr = PetscFree(starts);CHKERRQ(ierr); 828 829 base = owners[rank]; 830 831 /* wait on receives */ 832 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 833 count = nrecvs; slen = 0; 834 while (count) { 835 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 836 /* unpack receives into our local space */ 837 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 838 839 source[imdex] = recv_status.MPI_SOURCE; 840 lens[imdex] = n; 841 slen += n; 842 count--; 843 } 844 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 845 846 /* move the data into the send scatter */ 847 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 848 count = 0; 849 for (i=0; i<nrecvs; i++) { 850 values = rvalues + i*nmax; 851 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 852 } 853 ierr = PetscFree(rvalues);CHKERRQ(ierr); 854 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 855 ierr = PetscFree(owner);CHKERRQ(ierr); 856 ierr = PetscFree(nprocs);CHKERRQ(ierr); 857 858 /* fix right hand side if needed */ 859 if (x && b) { 860 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 861 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 862 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 863 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 864 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 865 } 866 /* 867 Zero the required rows. If the "diagonal block" of the matrix 868 is square and the user wishes to set the diagonal we use separate 869 code so that MatSetValues() is not called for each diagonal allocating 870 new memory, thus calling lots of mallocs and slowing things down. 871 872 */ 873 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 874 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 875 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 876 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 877 } else if (diag != 0.0) { 878 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 879 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 880 for (i = 0; i < slen; i++) { 881 row = lrows[i] + rstart; 882 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 883 } 884 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 885 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 886 } else { 887 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 888 } 889 ierr = PetscFree(lrows);CHKERRQ(ierr); 890 891 /* wait on sends */ 892 if (nsends) { 893 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 894 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 895 ierr = PetscFree(send_status);CHKERRQ(ierr); 896 } 897 ierr = PetscFree(send_waits);CHKERRQ(ierr); 898 ierr = PetscFree(svalues);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 904 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 905 { 906 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 907 PetscErrorCode ierr; 908 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 909 PetscInt i,*owners = A->rmap->range; 910 PetscInt *nprocs,j,idx,nsends; 911 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 912 PetscInt *rvalues,count,base,slen,*source; 913 PetscInt *lens,*lrows,*values,m; 914 MPI_Comm comm = ((PetscObject)A)->comm; 915 MPI_Request *send_waits,*recv_waits; 916 MPI_Status recv_status,*send_status; 917 const PetscScalar *xx; 918 PetscScalar *bb,*mask; 919 Vec xmask,lmask; 920 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 921 const PetscInt *aj, *ii,*ridx; 922 PetscScalar *aa; 923 #if defined(PETSC_DEBUG) 924 PetscBool found = PETSC_FALSE; 925 #endif 926 927 PetscFunctionBegin; 928 /* first count number of contributors to each processor */ 929 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 930 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 931 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 932 j = 0; 933 for (i=0; i<N; i++) { 934 if (lastidx > (idx = rows[i])) j = 0; 935 lastidx = idx; 936 for (; j<size; j++) { 937 if (idx >= owners[j] && idx < owners[j+1]) { 938 nprocs[2*j]++; 939 nprocs[2*j+1] = 1; 940 owner[i] = j; 941 #if defined(PETSC_DEBUG) 942 found = PETSC_TRUE; 943 #endif 944 break; 945 } 946 } 947 #if defined(PETSC_DEBUG) 948 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 949 found = PETSC_FALSE; 950 #endif 951 } 952 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 953 954 /* inform other processors of number of messages and max length*/ 955 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 956 957 /* post receives: */ 958 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 959 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 960 for (i=0; i<nrecvs; i++) { 961 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 962 } 963 964 /* do sends: 965 1) starts[i] gives the starting index in svalues for stuff going to 966 the ith processor 967 */ 968 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 969 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 970 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 971 972 starts[0] = 0; 973 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 974 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 975 976 starts[0] = 0; 977 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 978 count = 0; 979 for (i=0; i<size; i++) { 980 if (nprocs[2*i+1]) { 981 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 982 } 983 } 984 ierr = PetscFree(starts);CHKERRQ(ierr); 985 986 base = owners[rank]; 987 988 /* wait on receives */ 989 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 990 count = nrecvs; slen = 0; 991 while (count) { 992 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 993 /* unpack receives into our local space */ 994 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 995 996 source[imdex] = recv_status.MPI_SOURCE; 997 lens[imdex] = n; 998 slen += n; 999 count--; 1000 } 1001 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1002 1003 /* move the data into the send scatter */ 1004 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1005 count = 0; 1006 for (i=0; i<nrecvs; i++) { 1007 values = rvalues + i*nmax; 1008 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1009 } 1010 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1011 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1012 ierr = PetscFree(owner);CHKERRQ(ierr); 1013 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1014 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1015 1016 /* zero diagonal part of matrix */ 1017 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1018 1019 /* handle off diagonal part of matrix */ 1020 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1021 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1022 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1023 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1024 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1025 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1028 if (x) { 1029 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1032 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1033 } 1034 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1035 1036 /* remove zeroed rows of off diagonal matrix */ 1037 ii = aij->i; 1038 for (i=0; i<slen; i++) { 1039 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1040 } 1041 1042 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1043 if (aij->compressedrow.use) { 1044 m = aij->compressedrow.nrows; 1045 ii = aij->compressedrow.i; 1046 ridx = aij->compressedrow.rindex; 1047 for (i=0; i<m; i++) { 1048 n = ii[i+1] - ii[i]; 1049 aj = aij->j + ii[i]; 1050 aa = aij->a + ii[i]; 1051 1052 for (j=0; j<n; j++) { 1053 if (PetscAbsScalar(mask[*aj])) { 1054 if (b) bb[*ridx] -= *aa*xx[*aj]; 1055 *aa = 0.0; 1056 } 1057 aa++; 1058 aj++; 1059 } 1060 ridx++; 1061 } 1062 } else { /* do not use compressed row format */ 1063 m = l->B->rmap->n; 1064 for (i=0; i<m; i++) { 1065 n = ii[i+1] - ii[i]; 1066 aj = aij->j + ii[i]; 1067 aa = aij->a + ii[i]; 1068 for (j=0; j<n; j++) { 1069 if (PetscAbsScalar(mask[*aj])) { 1070 if (b) bb[i] -= *aa*xx[*aj]; 1071 *aa = 0.0; 1072 } 1073 aa++; 1074 aj++; 1075 } 1076 } 1077 } 1078 if (x) { 1079 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1080 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1081 } 1082 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1083 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1084 ierr = PetscFree(lrows);CHKERRQ(ierr); 1085 1086 /* wait on sends */ 1087 if (nsends) { 1088 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1089 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1090 ierr = PetscFree(send_status);CHKERRQ(ierr); 1091 } 1092 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1093 ierr = PetscFree(svalues);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatMult_MPIAIJ" 1099 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1100 { 1101 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1102 PetscErrorCode ierr; 1103 PetscInt nt; 1104 1105 PetscFunctionBegin; 1106 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1107 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1108 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1109 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1110 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1117 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1118 { 1119 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1120 PetscErrorCode ierr; 1121 1122 PetscFunctionBegin; 1123 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1129 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1130 { 1131 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1136 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1137 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1144 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1147 PetscErrorCode ierr; 1148 PetscBool merged; 1149 1150 PetscFunctionBegin; 1151 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1152 /* do nondiagonal part */ 1153 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1154 if (!merged) { 1155 /* send it on its way */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 /* do local part */ 1158 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1159 /* receive remote parts: note this assumes the values are not actually */ 1160 /* added in yy until the next line, */ 1161 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1162 } else { 1163 /* do local part */ 1164 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1165 /* send it on its way */ 1166 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1167 /* values actually were received in the Begin() but we need to call this nop */ 1168 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 EXTERN_C_BEGIN 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1176 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1177 { 1178 MPI_Comm comm; 1179 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1180 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1181 IS Me,Notme; 1182 PetscErrorCode ierr; 1183 PetscInt M,N,first,last,*notme,i; 1184 PetscMPIInt size; 1185 1186 PetscFunctionBegin; 1187 /* Easy test: symmetric diagonal block */ 1188 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1189 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1190 if (!*f) PetscFunctionReturn(0); 1191 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1192 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1193 if (size == 1) PetscFunctionReturn(0); 1194 1195 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1196 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1197 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1198 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1199 for (i=0; i<first; i++) notme[i] = i; 1200 for (i=last; i<M; i++) notme[i-last+first] = i; 1201 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1202 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1203 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1204 Aoff = Aoffs[0]; 1205 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1206 Boff = Boffs[0]; 1207 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1208 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1209 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1210 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1211 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1212 ierr = PetscFree(notme);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 EXTERN_C_END 1216 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1219 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1220 { 1221 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1222 PetscErrorCode ierr; 1223 1224 PetscFunctionBegin; 1225 /* do nondiagonal part */ 1226 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1227 /* send it on its way */ 1228 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1229 /* do local part */ 1230 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1231 /* receive remote parts */ 1232 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 /* 1237 This only works correctly for square matrices where the subblock A->A is the 1238 diagonal block 1239 */ 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1242 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1243 { 1244 PetscErrorCode ierr; 1245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1246 1247 PetscFunctionBegin; 1248 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1249 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1250 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatScale_MPIAIJ" 1256 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1257 { 1258 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1263 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatDestroy_MPIAIJ" 1269 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1270 { 1271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 #if defined(PETSC_USE_LOG) 1276 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1277 #endif 1278 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1279 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1280 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1282 #if defined(PETSC_USE_CTABLE) 1283 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1284 #else 1285 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1286 #endif 1287 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1288 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1289 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1290 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1291 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1292 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1293 1294 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1308 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1309 { 1310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1311 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1312 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1313 PetscErrorCode ierr; 1314 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1315 int fd; 1316 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1317 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1318 PetscScalar *column_values; 1319 PetscInt message_count,flowcontrolcount; 1320 FILE *file; 1321 1322 PetscFunctionBegin; 1323 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1324 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1325 nz = A->nz + B->nz; 1326 if (!rank) { 1327 header[0] = MAT_FILE_CLASSID; 1328 header[1] = mat->rmap->N; 1329 header[2] = mat->cmap->N; 1330 1331 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1332 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1333 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1334 /* get largest number of rows any processor has */ 1335 rlen = mat->rmap->n; 1336 range = mat->rmap->range; 1337 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1338 } else { 1339 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 rlen = mat->rmap->n; 1341 } 1342 1343 /* load up the local row counts */ 1344 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1345 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1346 1347 /* store the row lengths to the file */ 1348 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1349 if (!rank) { 1350 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1351 for (i=1; i<size; i++) { 1352 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1353 rlen = range[i+1] - range[i]; 1354 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1355 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1356 } 1357 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1358 } else { 1359 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1360 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1361 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1362 } 1363 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1364 1365 /* load up the local column indices */ 1366 nzmax = nz; /* th processor needs space a largest processor needs */ 1367 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1368 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1369 cnt = 0; 1370 for (i=0; i<mat->rmap->n; i++) { 1371 for (j=B->i[i]; j<B->i[i+1]; j++) { 1372 if ((col = garray[B->j[j]]) > cstart) break; 1373 column_indices[cnt++] = col; 1374 } 1375 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1376 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1377 } 1378 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1379 1380 /* store the column indices to the file */ 1381 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1382 if (!rank) { 1383 MPI_Status status; 1384 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1385 for (i=1; i<size; i++) { 1386 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1387 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1388 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1389 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1390 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1391 } 1392 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1393 } else { 1394 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1395 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1396 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1397 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1398 } 1399 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1400 1401 /* load up the local column values */ 1402 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1403 cnt = 0; 1404 for (i=0; i<mat->rmap->n; i++) { 1405 for (j=B->i[i]; j<B->i[i+1]; j++) { 1406 if (garray[B->j[j]] > cstart) break; 1407 column_values[cnt++] = B->a[j]; 1408 } 1409 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1410 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1411 } 1412 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1413 1414 /* store the column values to the file */ 1415 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1416 if (!rank) { 1417 MPI_Status status; 1418 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1419 for (i=1; i<size; i++) { 1420 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1421 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1422 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1423 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1424 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1425 } 1426 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1427 } else { 1428 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1429 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1430 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1431 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1432 } 1433 ierr = PetscFree(column_values);CHKERRQ(ierr); 1434 1435 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1436 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1442 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1443 { 1444 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1445 PetscErrorCode ierr; 1446 PetscMPIInt rank = aij->rank,size = aij->size; 1447 PetscBool isdraw,iascii,isbinary; 1448 PetscViewer sviewer; 1449 PetscViewerFormat format; 1450 1451 PetscFunctionBegin; 1452 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1453 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1455 if (iascii) { 1456 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1457 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1458 MatInfo info; 1459 PetscBool inodes; 1460 1461 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1462 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1463 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1465 if (!inodes) { 1466 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1467 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1468 } else { 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1470 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1471 } 1472 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1473 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1474 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1479 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1482 PetscInt inodecount,inodelimit,*inodes; 1483 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1484 if (inodes) { 1485 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1486 } else { 1487 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1488 } 1489 PetscFunctionReturn(0); 1490 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1491 PetscFunctionReturn(0); 1492 } 1493 } else if (isbinary) { 1494 if (size == 1) { 1495 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1496 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1497 } else { 1498 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1499 } 1500 PetscFunctionReturn(0); 1501 } else if (isdraw) { 1502 PetscDraw draw; 1503 PetscBool isnull; 1504 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1505 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1506 } 1507 1508 if (size == 1) { 1509 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1510 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1511 } else { 1512 /* assemble the entire matrix onto first processor. */ 1513 Mat A; 1514 Mat_SeqAIJ *Aloc; 1515 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1516 MatScalar *a; 1517 1518 if (mat->rmap->N > 1024) { 1519 PetscBool flg = PETSC_FALSE; 1520 1521 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1522 if (!flg) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1523 } 1524 1525 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1526 if (!rank) { 1527 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1528 } else { 1529 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1530 } 1531 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1532 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1533 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1534 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1535 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1536 1537 /* copy over the A part */ 1538 Aloc = (Mat_SeqAIJ*)aij->A->data; 1539 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1540 row = mat->rmap->rstart; 1541 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1542 for (i=0; i<m; i++) { 1543 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1544 row++; 1545 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1546 } 1547 aj = Aloc->j; 1548 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1549 1550 /* copy over the B part */ 1551 Aloc = (Mat_SeqAIJ*)aij->B->data; 1552 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1553 row = mat->rmap->rstart; 1554 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1555 ct = cols; 1556 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1557 for (i=0; i<m; i++) { 1558 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1559 row++; 1560 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1561 } 1562 ierr = PetscFree(ct);CHKERRQ(ierr); 1563 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1564 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1565 /* 1566 Everyone has to call to draw the matrix since the graphics waits are 1567 synchronized across all processors that share the PetscDraw object 1568 */ 1569 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1570 if (!rank) { 1571 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1572 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1573 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1574 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1575 } 1576 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1577 ierr = MatDestroy(&A);CHKERRQ(ierr); 1578 } 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatView_MPIAIJ" 1584 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1585 { 1586 PetscErrorCode ierr; 1587 PetscBool iascii,isdraw,issocket,isbinary; 1588 1589 PetscFunctionBegin; 1590 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1591 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1594 if (iascii || isdraw || isbinary || issocket) { 1595 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatSOR_MPIAIJ" 1602 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1603 { 1604 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1605 PetscErrorCode ierr; 1606 Vec bb1 = 0; 1607 PetscBool hasop; 1608 1609 PetscFunctionBegin; 1610 if (flag == SOR_APPLY_UPPER) { 1611 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1612 PetscFunctionReturn(0); 1613 } 1614 1615 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1616 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1617 } 1618 1619 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1620 if (flag & SOR_ZERO_INITIAL_GUESS) { 1621 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1622 its--; 1623 } 1624 1625 while (its--) { 1626 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1627 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 1629 /* update rhs: bb1 = bb - B*x */ 1630 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1631 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1632 1633 /* local sweep */ 1634 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1635 } 1636 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1637 if (flag & SOR_ZERO_INITIAL_GUESS) { 1638 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1639 its--; 1640 } 1641 while (its--) { 1642 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 1645 /* update rhs: bb1 = bb - B*x */ 1646 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1647 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1648 1649 /* local sweep */ 1650 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1651 } 1652 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1653 if (flag & SOR_ZERO_INITIAL_GUESS) { 1654 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1655 its--; 1656 } 1657 while (its--) { 1658 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1659 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 1661 /* update rhs: bb1 = bb - B*x */ 1662 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1663 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1664 1665 /* local sweep */ 1666 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1667 } 1668 } else if (flag & SOR_EISENSTAT) { 1669 Vec xx1; 1670 1671 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1672 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1673 1674 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1675 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1676 if (!mat->diag) { 1677 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1678 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1679 } 1680 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1681 if (hasop) { 1682 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1683 } else { 1684 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1685 } 1686 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1687 1688 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1689 1690 /* local sweep */ 1691 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1692 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1693 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1694 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1695 1696 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatPermute_MPIAIJ" 1702 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1703 { 1704 Mat aA,aB,Aperm; 1705 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1706 PetscScalar *aa,*ba; 1707 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1708 PetscSF rowsf,sf; 1709 IS parcolp = NULL; 1710 PetscBool done; 1711 PetscErrorCode ierr; 1712 1713 PetscFunctionBegin; 1714 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1715 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1716 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1717 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1718 1719 /* Invert row permutation to find out where my rows should go */ 1720 ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr); 1721 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1722 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1723 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1724 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1725 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1726 1727 /* Invert column permutation to find out where my columns should go */ 1728 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1729 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1730 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1731 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1732 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1733 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1734 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1735 1736 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1737 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1738 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1739 1740 /* Find out where my gcols should go */ 1741 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1742 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1743 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1744 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1745 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1746 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1747 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1748 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1749 1750 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1751 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1752 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1753 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1754 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1755 for (i=0; i<m; i++) { 1756 PetscInt row = rdest[i],rowner; 1757 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1758 for (j=ai[i]; j<ai[i+1]; j++) { 1759 PetscInt cowner,col = cdest[aj[j]]; 1760 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1761 if (rowner == cowner) dnnz[i]++; 1762 else onnz[i]++; 1763 } 1764 for (j=bi[i]; j<bi[i+1]; j++) { 1765 PetscInt cowner,col = gcdest[bj[j]]; 1766 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1767 if (rowner == cowner) dnnz[i]++; 1768 else onnz[i]++; 1769 } 1770 } 1771 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1772 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1773 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1774 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1777 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1778 1779 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1780 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1781 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1782 for (i=0; i<m; i++) { 1783 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1784 PetscInt rowlen; 1785 rowlen = ai[i+1] - ai[i]; 1786 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1787 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1788 rowlen = bi[i+1] - bi[i]; 1789 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1790 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1791 } 1792 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1793 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1794 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1795 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1796 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1797 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1798 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1799 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1800 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1801 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1802 *B = Aperm; 1803 PetscFunctionReturn(0); 1804 } 1805 1806 #undef __FUNCT__ 1807 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1808 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1809 { 1810 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1811 Mat A = mat->A,B = mat->B; 1812 PetscErrorCode ierr; 1813 PetscReal isend[5],irecv[5]; 1814 1815 PetscFunctionBegin; 1816 info->block_size = 1.0; 1817 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1818 1819 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1820 isend[3] = info->memory; isend[4] = info->mallocs; 1821 1822 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1823 1824 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1825 isend[3] += info->memory; isend[4] += info->mallocs; 1826 if (flag == MAT_LOCAL) { 1827 info->nz_used = isend[0]; 1828 info->nz_allocated = isend[1]; 1829 info->nz_unneeded = isend[2]; 1830 info->memory = isend[3]; 1831 info->mallocs = isend[4]; 1832 } else if (flag == MAT_GLOBAL_MAX) { 1833 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1834 1835 info->nz_used = irecv[0]; 1836 info->nz_allocated = irecv[1]; 1837 info->nz_unneeded = irecv[2]; 1838 info->memory = irecv[3]; 1839 info->mallocs = irecv[4]; 1840 } else if (flag == MAT_GLOBAL_SUM) { 1841 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1842 1843 info->nz_used = irecv[0]; 1844 info->nz_allocated = irecv[1]; 1845 info->nz_unneeded = irecv[2]; 1846 info->memory = irecv[3]; 1847 info->mallocs = irecv[4]; 1848 } 1849 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1850 info->fill_ratio_needed = 0; 1851 info->factor_mallocs = 0; 1852 PetscFunctionReturn(0); 1853 } 1854 1855 #undef __FUNCT__ 1856 #define __FUNCT__ "MatSetOption_MPIAIJ" 1857 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1858 { 1859 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1860 PetscErrorCode ierr; 1861 1862 PetscFunctionBegin; 1863 switch (op) { 1864 case MAT_NEW_NONZERO_LOCATIONS: 1865 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1866 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1867 case MAT_KEEP_NONZERO_PATTERN: 1868 case MAT_NEW_NONZERO_LOCATION_ERR: 1869 case MAT_USE_INODES: 1870 case MAT_IGNORE_ZERO_ENTRIES: 1871 MatCheckPreallocated(A,1); 1872 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1873 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1874 break; 1875 case MAT_ROW_ORIENTED: 1876 a->roworiented = flg; 1877 1878 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1879 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1880 break; 1881 case MAT_NEW_DIAGONALS: 1882 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1883 break; 1884 case MAT_IGNORE_OFF_PROC_ENTRIES: 1885 a->donotstash = flg; 1886 break; 1887 case MAT_SPD: 1888 A->spd_set = PETSC_TRUE; 1889 A->spd = flg; 1890 if (flg) { 1891 A->symmetric = PETSC_TRUE; 1892 A->structurally_symmetric = PETSC_TRUE; 1893 A->symmetric_set = PETSC_TRUE; 1894 A->structurally_symmetric_set = PETSC_TRUE; 1895 } 1896 break; 1897 case MAT_SYMMETRIC: 1898 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1899 break; 1900 case MAT_STRUCTURALLY_SYMMETRIC: 1901 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1902 break; 1903 case MAT_HERMITIAN: 1904 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1905 break; 1906 case MAT_SYMMETRY_ETERNAL: 1907 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1908 break; 1909 default: 1910 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatGetRow_MPIAIJ" 1917 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1918 { 1919 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1920 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1921 PetscErrorCode ierr; 1922 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1923 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1924 PetscInt *cmap,*idx_p; 1925 1926 PetscFunctionBegin; 1927 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1928 mat->getrowactive = PETSC_TRUE; 1929 1930 if (!mat->rowvalues && (idx || v)) { 1931 /* 1932 allocate enough space to hold information from the longest row. 1933 */ 1934 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1935 PetscInt max = 1,tmp; 1936 for (i=0; i<matin->rmap->n; i++) { 1937 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1938 if (max < tmp) max = tmp; 1939 } 1940 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1941 } 1942 1943 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1944 lrow = row - rstart; 1945 1946 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1947 if (!v) {pvA = 0; pvB = 0;} 1948 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1949 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1950 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1951 nztot = nzA + nzB; 1952 1953 cmap = mat->garray; 1954 if (v || idx) { 1955 if (nztot) { 1956 /* Sort by increasing column numbers, assuming A and B already sorted */ 1957 PetscInt imark = -1; 1958 if (v) { 1959 *v = v_p = mat->rowvalues; 1960 for (i=0; i<nzB; i++) { 1961 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1962 else break; 1963 } 1964 imark = i; 1965 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1966 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1967 } 1968 if (idx) { 1969 *idx = idx_p = mat->rowindices; 1970 if (imark > -1) { 1971 for (i=0; i<imark; i++) { 1972 idx_p[i] = cmap[cworkB[i]]; 1973 } 1974 } else { 1975 for (i=0; i<nzB; i++) { 1976 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1977 else break; 1978 } 1979 imark = i; 1980 } 1981 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1982 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1983 } 1984 } else { 1985 if (idx) *idx = 0; 1986 if (v) *v = 0; 1987 } 1988 } 1989 *nz = nztot; 1990 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1991 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1992 PetscFunctionReturn(0); 1993 } 1994 1995 #undef __FUNCT__ 1996 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1997 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1998 { 1999 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2000 2001 PetscFunctionBegin; 2002 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2003 aij->getrowactive = PETSC_FALSE; 2004 PetscFunctionReturn(0); 2005 } 2006 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatNorm_MPIAIJ" 2009 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2010 { 2011 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2012 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2013 PetscErrorCode ierr; 2014 PetscInt i,j,cstart = mat->cmap->rstart; 2015 PetscReal sum = 0.0; 2016 MatScalar *v; 2017 2018 PetscFunctionBegin; 2019 if (aij->size == 1) { 2020 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2021 } else { 2022 if (type == NORM_FROBENIUS) { 2023 v = amat->a; 2024 for (i=0; i<amat->nz; i++) { 2025 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2026 } 2027 v = bmat->a; 2028 for (i=0; i<bmat->nz; i++) { 2029 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2030 } 2031 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2032 *norm = PetscSqrtReal(*norm); 2033 } else if (type == NORM_1) { /* max column norm */ 2034 PetscReal *tmp,*tmp2; 2035 PetscInt *jj,*garray = aij->garray; 2036 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2037 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2038 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2039 *norm = 0.0; 2040 v = amat->a; jj = amat->j; 2041 for (j=0; j<amat->nz; j++) { 2042 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2043 } 2044 v = bmat->a; jj = bmat->j; 2045 for (j=0; j<bmat->nz; j++) { 2046 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2047 } 2048 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2049 for (j=0; j<mat->cmap->N; j++) { 2050 if (tmp2[j] > *norm) *norm = tmp2[j]; 2051 } 2052 ierr = PetscFree(tmp);CHKERRQ(ierr); 2053 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2054 } else if (type == NORM_INFINITY) { /* max row norm */ 2055 PetscReal ntemp = 0.0; 2056 for (j=0; j<aij->A->rmap->n; j++) { 2057 v = amat->a + amat->i[j]; 2058 sum = 0.0; 2059 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2060 sum += PetscAbsScalar(*v); v++; 2061 } 2062 v = bmat->a + bmat->i[j]; 2063 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2064 sum += PetscAbsScalar(*v); v++; 2065 } 2066 if (sum > ntemp) ntemp = sum; 2067 } 2068 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2069 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2070 } 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatTranspose_MPIAIJ" 2076 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2077 { 2078 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2079 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2080 PetscErrorCode ierr; 2081 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2082 PetscInt cstart = A->cmap->rstart,ncol; 2083 Mat B; 2084 MatScalar *array; 2085 2086 PetscFunctionBegin; 2087 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2088 2089 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2090 ai = Aloc->i; aj = Aloc->j; 2091 bi = Bloc->i; bj = Bloc->j; 2092 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2093 PetscInt *d_nnz,*g_nnz,*o_nnz; 2094 PetscSFNode *oloc; 2095 PETSC_UNUSED PetscSF sf; 2096 2097 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2098 /* compute d_nnz for preallocation */ 2099 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2100 for (i=0; i<ai[ma]; i++) { 2101 d_nnz[aj[i]]++; 2102 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2103 } 2104 /* compute local off-diagonal contributions */ 2105 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2106 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2107 /* map those to global */ 2108 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2109 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2110 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2111 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2112 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2113 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2114 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2115 2116 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2117 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2118 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2119 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2120 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2121 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2122 } else { 2123 B = *matout; 2124 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2125 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2126 } 2127 2128 /* copy over the A part */ 2129 array = Aloc->a; 2130 row = A->rmap->rstart; 2131 for (i=0; i<ma; i++) { 2132 ncol = ai[i+1]-ai[i]; 2133 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2134 row++; 2135 array += ncol; aj += ncol; 2136 } 2137 aj = Aloc->j; 2138 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2139 2140 /* copy over the B part */ 2141 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2142 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2143 array = Bloc->a; 2144 row = A->rmap->rstart; 2145 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2146 cols_tmp = cols; 2147 for (i=0; i<mb; i++) { 2148 ncol = bi[i+1]-bi[i]; 2149 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2150 row++; 2151 array += ncol; cols_tmp += ncol; 2152 } 2153 ierr = PetscFree(cols);CHKERRQ(ierr); 2154 2155 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2158 *matout = B; 2159 } else { 2160 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2161 } 2162 PetscFunctionReturn(0); 2163 } 2164 2165 #undef __FUNCT__ 2166 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2167 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2168 { 2169 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2170 Mat a = aij->A,b = aij->B; 2171 PetscErrorCode ierr; 2172 PetscInt s1,s2,s3; 2173 2174 PetscFunctionBegin; 2175 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2176 if (rr) { 2177 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2178 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2179 /* Overlap communication with computation. */ 2180 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2181 } 2182 if (ll) { 2183 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2184 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2185 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2186 } 2187 /* scale the diagonal block */ 2188 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2189 2190 if (rr) { 2191 /* Do a scatter end and then right scale the off-diagonal block */ 2192 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2193 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2194 } 2195 PetscFunctionReturn(0); 2196 } 2197 2198 #undef __FUNCT__ 2199 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2200 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2201 { 2202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2203 PetscErrorCode ierr; 2204 2205 PetscFunctionBegin; 2206 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2207 PetscFunctionReturn(0); 2208 } 2209 2210 #undef __FUNCT__ 2211 #define __FUNCT__ "MatEqual_MPIAIJ" 2212 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2213 { 2214 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2215 Mat a,b,c,d; 2216 PetscBool flg; 2217 PetscErrorCode ierr; 2218 2219 PetscFunctionBegin; 2220 a = matA->A; b = matA->B; 2221 c = matB->A; d = matB->B; 2222 2223 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2224 if (flg) { 2225 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2226 } 2227 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2228 PetscFunctionReturn(0); 2229 } 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatCopy_MPIAIJ" 2233 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2234 { 2235 PetscErrorCode ierr; 2236 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2237 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2238 2239 PetscFunctionBegin; 2240 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2241 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2242 /* because of the column compression in the off-processor part of the matrix a->B, 2243 the number of columns in a->B and b->B may be different, hence we cannot call 2244 the MatCopy() directly on the two parts. If need be, we can provide a more 2245 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2246 then copying the submatrices */ 2247 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2248 } else { 2249 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2250 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2251 } 2252 PetscFunctionReturn(0); 2253 } 2254 2255 #undef __FUNCT__ 2256 #define __FUNCT__ "MatSetUp_MPIAIJ" 2257 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2258 { 2259 PetscErrorCode ierr; 2260 2261 PetscFunctionBegin; 2262 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2263 PetscFunctionReturn(0); 2264 } 2265 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2268 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2269 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2270 { 2271 PetscInt i,m=Y->rmap->N; 2272 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2273 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2274 const PetscInt *xi = x->i,*yi = y->i; 2275 2276 PetscFunctionBegin; 2277 /* Set the number of nonzeros in the new matrix */ 2278 for (i=0; i<m; i++) { 2279 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2280 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2281 nnz[i] = 0; 2282 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2283 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2284 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2285 nnz[i]++; 2286 } 2287 for (; k<nzy; k++) nnz[i]++; 2288 } 2289 PetscFunctionReturn(0); 2290 } 2291 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatAXPY_MPIAIJ" 2294 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2295 { 2296 PetscErrorCode ierr; 2297 PetscInt i; 2298 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2299 PetscBLASInt bnz,one=1; 2300 Mat_SeqAIJ *x,*y; 2301 2302 PetscFunctionBegin; 2303 if (str == SAME_NONZERO_PATTERN) { 2304 PetscScalar alpha = a; 2305 x = (Mat_SeqAIJ*)xx->A->data; 2306 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2307 y = (Mat_SeqAIJ*)yy->A->data; 2308 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2309 x = (Mat_SeqAIJ*)xx->B->data; 2310 y = (Mat_SeqAIJ*)yy->B->data; 2311 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2312 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2313 } else if (str == SUBSET_NONZERO_PATTERN) { 2314 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2315 2316 x = (Mat_SeqAIJ*)xx->B->data; 2317 y = (Mat_SeqAIJ*)yy->B->data; 2318 if (y->xtoy && y->XtoY != xx->B) { 2319 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2320 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2321 } 2322 if (!y->xtoy) { /* get xtoy */ 2323 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2324 y->XtoY = xx->B; 2325 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2326 } 2327 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2328 } else { 2329 Mat B; 2330 PetscInt *nnz_d,*nnz_o; 2331 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2332 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2333 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2334 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2335 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2336 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2337 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2338 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2339 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2340 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2341 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2342 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2343 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2344 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2345 } 2346 PetscFunctionReturn(0); 2347 } 2348 2349 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2350 2351 #undef __FUNCT__ 2352 #define __FUNCT__ "MatConjugate_MPIAIJ" 2353 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2354 { 2355 #if defined(PETSC_USE_COMPLEX) 2356 PetscErrorCode ierr; 2357 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2358 2359 PetscFunctionBegin; 2360 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2361 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2362 #else 2363 PetscFunctionBegin; 2364 #endif 2365 PetscFunctionReturn(0); 2366 } 2367 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatRealPart_MPIAIJ" 2370 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2371 { 2372 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2373 PetscErrorCode ierr; 2374 2375 PetscFunctionBegin; 2376 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2377 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2383 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2384 { 2385 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2386 PetscErrorCode ierr; 2387 2388 PetscFunctionBegin; 2389 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2390 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #if defined(PETSC_HAVE_PBGL) 2395 2396 #include <boost/parallel/mpi/bsp_process_group.hpp> 2397 #include <boost/graph/distributed/ilu_default_graph.hpp> 2398 #include <boost/graph/distributed/ilu_0_block.hpp> 2399 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2400 #include <boost/graph/distributed/petsc/interface.hpp> 2401 #include <boost/multi_array.hpp> 2402 #include <boost/parallel/distributed_property_map->hpp> 2403 2404 #undef __FUNCT__ 2405 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2406 /* 2407 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2408 */ 2409 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2410 { 2411 namespace petsc = boost::distributed::petsc; 2412 2413 namespace graph_dist = boost::graph::distributed; 2414 using boost::graph::distributed::ilu_default::process_group_type; 2415 using boost::graph::ilu_permuted; 2416 2417 PetscBool row_identity, col_identity; 2418 PetscContainer c; 2419 PetscInt m, n, M, N; 2420 PetscErrorCode ierr; 2421 2422 PetscFunctionBegin; 2423 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2424 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2425 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2426 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2427 2428 process_group_type pg; 2429 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2430 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2431 lgraph_type& level_graph = *lgraph_p; 2432 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2433 2434 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2435 ilu_permuted(level_graph); 2436 2437 /* put together the new matrix */ 2438 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2439 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2440 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2441 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2442 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2443 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2444 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2445 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2446 2447 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2448 ierr = PetscContainerSetPointer(c, lgraph_p); 2449 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2450 ierr = PetscContainerDestroy(&c); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 #undef __FUNCT__ 2455 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2456 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2457 { 2458 PetscFunctionBegin; 2459 PetscFunctionReturn(0); 2460 } 2461 2462 #undef __FUNCT__ 2463 #define __FUNCT__ "MatSolve_MPIAIJ" 2464 /* 2465 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2466 */ 2467 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2468 { 2469 namespace graph_dist = boost::graph::distributed; 2470 2471 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2472 lgraph_type *lgraph_p; 2473 PetscContainer c; 2474 PetscErrorCode ierr; 2475 2476 PetscFunctionBegin; 2477 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2478 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2479 ierr = VecCopy(b, x);CHKERRQ(ierr); 2480 2481 PetscScalar *array_x; 2482 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2483 PetscInt sx; 2484 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2485 2486 PetscScalar *array_b; 2487 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2488 PetscInt sb; 2489 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2490 2491 lgraph_type& level_graph = *lgraph_p; 2492 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2493 2494 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2495 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2496 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2497 2498 typedef boost::iterator_property_map<array_ref_type::iterator, 2499 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2500 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2501 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2502 2503 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2504 PetscFunctionReturn(0); 2505 } 2506 #endif 2507 2508 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2509 PetscInt nzlocal,nsends,nrecvs; 2510 PetscMPIInt *send_rank,*recv_rank; 2511 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2512 PetscScalar *sbuf_a,**rbuf_a; 2513 PetscErrorCode (*Destroy)(Mat); 2514 } Mat_Redundant; 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2518 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2519 { 2520 PetscErrorCode ierr; 2521 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2522 PetscInt i; 2523 2524 PetscFunctionBegin; 2525 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2526 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2527 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2528 for (i=0; i<redund->nrecvs; i++) { 2529 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2530 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2531 } 2532 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2533 ierr = PetscFree(redund);CHKERRQ(ierr); 2534 PetscFunctionReturn(0); 2535 } 2536 2537 #undef __FUNCT__ 2538 #define __FUNCT__ "MatDestroy_MatRedundant" 2539 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2540 { 2541 PetscErrorCode ierr; 2542 PetscContainer container; 2543 Mat_Redundant *redund=NULL; 2544 2545 PetscFunctionBegin; 2546 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2547 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2548 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2549 2550 A->ops->destroy = redund->Destroy; 2551 2552 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2553 if (A->ops->destroy) { 2554 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2555 } 2556 PetscFunctionReturn(0); 2557 } 2558 2559 #undef __FUNCT__ 2560 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2561 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2562 { 2563 PetscMPIInt rank,size; 2564 MPI_Comm comm=((PetscObject)mat)->comm; 2565 PetscErrorCode ierr; 2566 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2567 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2568 PetscInt *rowrange = mat->rmap->range; 2569 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2570 Mat A = aij->A,B=aij->B,C=*matredundant; 2571 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2572 PetscScalar *sbuf_a; 2573 PetscInt nzlocal=a->nz+b->nz; 2574 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2575 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2576 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2577 MatScalar *aworkA,*aworkB; 2578 PetscScalar *vals; 2579 PetscMPIInt tag1,tag2,tag3,imdex; 2580 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2581 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2582 MPI_Status recv_status,*send_status; 2583 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2584 PetscInt **rbuf_j=NULL; 2585 PetscScalar **rbuf_a=NULL; 2586 Mat_Redundant *redund =NULL; 2587 PetscContainer container; 2588 2589 PetscFunctionBegin; 2590 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2591 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2592 2593 if (reuse == MAT_REUSE_MATRIX) { 2594 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2595 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2596 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2597 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2598 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2599 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2600 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2601 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2602 2603 nsends = redund->nsends; 2604 nrecvs = redund->nrecvs; 2605 send_rank = redund->send_rank; 2606 recv_rank = redund->recv_rank; 2607 sbuf_nz = redund->sbuf_nz; 2608 rbuf_nz = redund->rbuf_nz; 2609 sbuf_j = redund->sbuf_j; 2610 sbuf_a = redund->sbuf_a; 2611 rbuf_j = redund->rbuf_j; 2612 rbuf_a = redund->rbuf_a; 2613 } 2614 2615 if (reuse == MAT_INITIAL_MATRIX) { 2616 PetscMPIInt subrank,subsize; 2617 PetscInt nleftover,np_subcomm; 2618 /* get the destination processors' id send_rank, nsends and nrecvs */ 2619 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2620 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2621 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2622 2623 np_subcomm = size/nsubcomm; 2624 nleftover = size - nsubcomm*np_subcomm; 2625 2626 nsends = 0; nrecvs = 0; 2627 for (i=0; i<size; i++) { /* i=rank*/ 2628 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2629 send_rank[nsends] = i; nsends++; 2630 recv_rank[nrecvs++] = i; 2631 } 2632 } 2633 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2634 i = size-nleftover-1; 2635 j = 0; 2636 while (j < nsubcomm - nleftover) { 2637 send_rank[nsends++] = i; 2638 i--; j++; 2639 } 2640 } 2641 2642 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2643 for (i=0; i<nleftover; i++) { 2644 recv_rank[nrecvs++] = size-nleftover+i; 2645 } 2646 } 2647 2648 /* allocate sbuf_j, sbuf_a */ 2649 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2650 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2651 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2652 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2653 2654 /* copy mat's local entries into the buffers */ 2655 if (reuse == MAT_INITIAL_MATRIX) { 2656 rownz_max = 0; 2657 rptr = sbuf_j; 2658 cols = sbuf_j + rend-rstart + 1; 2659 vals = sbuf_a; 2660 rptr[0] = 0; 2661 for (i=0; i<rend-rstart; i++) { 2662 row = i + rstart; 2663 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2664 ncols = nzA + nzB; 2665 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2666 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2667 /* load the column indices for this row into cols */ 2668 lwrite = 0; 2669 for (l=0; l<nzB; l++) { 2670 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2671 vals[lwrite] = aworkB[l]; 2672 cols[lwrite++] = ctmp; 2673 } 2674 } 2675 for (l=0; l<nzA; l++) { 2676 vals[lwrite] = aworkA[l]; 2677 cols[lwrite++] = cstart + cworkA[l]; 2678 } 2679 for (l=0; l<nzB; l++) { 2680 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2681 vals[lwrite] = aworkB[l]; 2682 cols[lwrite++] = ctmp; 2683 } 2684 } 2685 vals += ncols; 2686 cols += ncols; 2687 rptr[i+1] = rptr[i] + ncols; 2688 if (rownz_max < ncols) rownz_max = ncols; 2689 } 2690 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2691 } else { /* only copy matrix values into sbuf_a */ 2692 rptr = sbuf_j; 2693 vals = sbuf_a; 2694 rptr[0] = 0; 2695 for (i=0; i<rend-rstart; i++) { 2696 row = i + rstart; 2697 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2698 ncols = nzA + nzB; 2699 cworkB = b->j + b->i[i]; 2700 aworkA = a->a + a->i[i]; 2701 aworkB = b->a + b->i[i]; 2702 lwrite = 0; 2703 for (l=0; l<nzB; l++) { 2704 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2705 } 2706 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2707 for (l=0; l<nzB; l++) { 2708 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2709 } 2710 vals += ncols; 2711 rptr[i+1] = rptr[i] + ncols; 2712 } 2713 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2714 2715 /* send nzlocal to others, and recv other's nzlocal */ 2716 /*--------------------------------------------------*/ 2717 if (reuse == MAT_INITIAL_MATRIX) { 2718 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2719 2720 s_waits2 = s_waits3 + nsends; 2721 s_waits1 = s_waits2 + nsends; 2722 r_waits1 = s_waits1 + nsends; 2723 r_waits2 = r_waits1 + nrecvs; 2724 r_waits3 = r_waits2 + nrecvs; 2725 } else { 2726 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2727 2728 r_waits3 = s_waits3 + nsends; 2729 } 2730 2731 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2732 if (reuse == MAT_INITIAL_MATRIX) { 2733 /* get new tags to keep the communication clean */ 2734 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2735 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2736 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2737 2738 /* post receives of other's nzlocal */ 2739 for (i=0; i<nrecvs; i++) { 2740 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2741 } 2742 /* send nzlocal to others */ 2743 for (i=0; i<nsends; i++) { 2744 sbuf_nz[i] = nzlocal; 2745 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2746 } 2747 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2748 count = nrecvs; 2749 while (count) { 2750 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2751 2752 recv_rank[imdex] = recv_status.MPI_SOURCE; 2753 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2754 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2755 2756 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2757 2758 rbuf_nz[imdex] += i + 2; 2759 2760 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2761 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2762 count--; 2763 } 2764 /* wait on sends of nzlocal */ 2765 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2766 /* send mat->i,j to others, and recv from other's */ 2767 /*------------------------------------------------*/ 2768 for (i=0; i<nsends; i++) { 2769 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2770 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2771 } 2772 /* wait on receives of mat->i,j */ 2773 /*------------------------------*/ 2774 count = nrecvs; 2775 while (count) { 2776 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2777 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2778 count--; 2779 } 2780 /* wait on sends of mat->i,j */ 2781 /*---------------------------*/ 2782 if (nsends) { 2783 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2784 } 2785 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2786 2787 /* post receives, send and receive mat->a */ 2788 /*----------------------------------------*/ 2789 for (imdex=0; imdex<nrecvs; imdex++) { 2790 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2791 } 2792 for (i=0; i<nsends; i++) { 2793 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2794 } 2795 count = nrecvs; 2796 while (count) { 2797 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2798 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2799 count--; 2800 } 2801 if (nsends) { 2802 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2803 } 2804 2805 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2806 2807 /* create redundant matrix */ 2808 /*-------------------------*/ 2809 if (reuse == MAT_INITIAL_MATRIX) { 2810 /* compute rownz_max for preallocation */ 2811 for (imdex=0; imdex<nrecvs; imdex++) { 2812 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2813 rptr = rbuf_j[imdex]; 2814 for (i=0; i<j; i++) { 2815 ncols = rptr[i+1] - rptr[i]; 2816 if (rownz_max < ncols) rownz_max = ncols; 2817 } 2818 } 2819 2820 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2821 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2822 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2823 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2824 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2825 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2826 } else { 2827 C = *matredundant; 2828 } 2829 2830 /* insert local matrix entries */ 2831 rptr = sbuf_j; 2832 cols = sbuf_j + rend-rstart + 1; 2833 vals = sbuf_a; 2834 for (i=0; i<rend-rstart; i++) { 2835 row = i + rstart; 2836 ncols = rptr[i+1] - rptr[i]; 2837 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2838 vals += ncols; 2839 cols += ncols; 2840 } 2841 /* insert received matrix entries */ 2842 for (imdex=0; imdex<nrecvs; imdex++) { 2843 rstart = rowrange[recv_rank[imdex]]; 2844 rend = rowrange[recv_rank[imdex]+1]; 2845 rptr = rbuf_j[imdex]; 2846 cols = rbuf_j[imdex] + rend-rstart + 1; 2847 vals = rbuf_a[imdex]; 2848 for (i=0; i<rend-rstart; i++) { 2849 row = i + rstart; 2850 ncols = rptr[i+1] - rptr[i]; 2851 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2852 vals += ncols; 2853 cols += ncols; 2854 } 2855 } 2856 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2857 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2858 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2859 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2860 if (reuse == MAT_INITIAL_MATRIX) { 2861 PetscContainer container; 2862 *matredundant = C; 2863 /* create a supporting struct and attach it to C for reuse */ 2864 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2865 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2866 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2867 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2868 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2869 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2870 2871 redund->nzlocal = nzlocal; 2872 redund->nsends = nsends; 2873 redund->nrecvs = nrecvs; 2874 redund->send_rank = send_rank; 2875 redund->recv_rank = recv_rank; 2876 redund->sbuf_nz = sbuf_nz; 2877 redund->rbuf_nz = rbuf_nz; 2878 redund->sbuf_j = sbuf_j; 2879 redund->sbuf_a = sbuf_a; 2880 redund->rbuf_j = rbuf_j; 2881 redund->rbuf_a = rbuf_a; 2882 2883 redund->Destroy = C->ops->destroy; 2884 C->ops->destroy = MatDestroy_MatRedundant; 2885 } 2886 PetscFunctionReturn(0); 2887 } 2888 2889 #undef __FUNCT__ 2890 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2891 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2892 { 2893 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2894 PetscErrorCode ierr; 2895 PetscInt i,*idxb = 0; 2896 PetscScalar *va,*vb; 2897 Vec vtmp; 2898 2899 PetscFunctionBegin; 2900 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2901 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2902 if (idx) { 2903 for (i=0; i<A->rmap->n; i++) { 2904 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2905 } 2906 } 2907 2908 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2909 if (idx) { 2910 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2911 } 2912 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2913 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2914 2915 for (i=0; i<A->rmap->n; i++) { 2916 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2917 va[i] = vb[i]; 2918 if (idx) idx[i] = a->garray[idxb[i]]; 2919 } 2920 } 2921 2922 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2923 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2924 ierr = PetscFree(idxb);CHKERRQ(ierr); 2925 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2926 PetscFunctionReturn(0); 2927 } 2928 2929 #undef __FUNCT__ 2930 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2931 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2932 { 2933 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2934 PetscErrorCode ierr; 2935 PetscInt i,*idxb = 0; 2936 PetscScalar *va,*vb; 2937 Vec vtmp; 2938 2939 PetscFunctionBegin; 2940 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2941 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2942 if (idx) { 2943 for (i=0; i<A->cmap->n; i++) { 2944 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2945 } 2946 } 2947 2948 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2949 if (idx) { 2950 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2951 } 2952 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2953 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2954 2955 for (i=0; i<A->rmap->n; i++) { 2956 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2957 va[i] = vb[i]; 2958 if (idx) idx[i] = a->garray[idxb[i]]; 2959 } 2960 } 2961 2962 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2963 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2964 ierr = PetscFree(idxb);CHKERRQ(ierr); 2965 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2966 PetscFunctionReturn(0); 2967 } 2968 2969 #undef __FUNCT__ 2970 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2971 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2972 { 2973 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2974 PetscInt n = A->rmap->n; 2975 PetscInt cstart = A->cmap->rstart; 2976 PetscInt *cmap = mat->garray; 2977 PetscInt *diagIdx, *offdiagIdx; 2978 Vec diagV, offdiagV; 2979 PetscScalar *a, *diagA, *offdiagA; 2980 PetscInt r; 2981 PetscErrorCode ierr; 2982 2983 PetscFunctionBegin; 2984 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2985 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2986 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2987 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2988 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2989 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2990 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2991 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2992 for (r = 0; r < n; ++r) { 2993 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2994 a[r] = diagA[r]; 2995 idx[r] = cstart + diagIdx[r]; 2996 } else { 2997 a[r] = offdiagA[r]; 2998 idx[r] = cmap[offdiagIdx[r]]; 2999 } 3000 } 3001 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3002 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3003 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3004 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3005 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3006 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3007 PetscFunctionReturn(0); 3008 } 3009 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3012 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3013 { 3014 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3015 PetscInt n = A->rmap->n; 3016 PetscInt cstart = A->cmap->rstart; 3017 PetscInt *cmap = mat->garray; 3018 PetscInt *diagIdx, *offdiagIdx; 3019 Vec diagV, offdiagV; 3020 PetscScalar *a, *diagA, *offdiagA; 3021 PetscInt r; 3022 PetscErrorCode ierr; 3023 3024 PetscFunctionBegin; 3025 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3026 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3027 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3028 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3029 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3030 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3031 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3032 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3033 for (r = 0; r < n; ++r) { 3034 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3035 a[r] = diagA[r]; 3036 idx[r] = cstart + diagIdx[r]; 3037 } else { 3038 a[r] = offdiagA[r]; 3039 idx[r] = cmap[offdiagIdx[r]]; 3040 } 3041 } 3042 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3043 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3044 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3045 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3046 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3047 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3048 PetscFunctionReturn(0); 3049 } 3050 3051 #undef __FUNCT__ 3052 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3053 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3054 { 3055 PetscErrorCode ierr; 3056 Mat *dummy; 3057 3058 PetscFunctionBegin; 3059 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3060 *newmat = *dummy; 3061 ierr = PetscFree(dummy);CHKERRQ(ierr); 3062 PetscFunctionReturn(0); 3063 } 3064 3065 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3066 3067 #undef __FUNCT__ 3068 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3069 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3070 { 3071 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3072 PetscErrorCode ierr; 3073 3074 PetscFunctionBegin; 3075 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3076 PetscFunctionReturn(0); 3077 } 3078 3079 #undef __FUNCT__ 3080 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3081 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3082 { 3083 PetscErrorCode ierr; 3084 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3085 3086 PetscFunctionBegin; 3087 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3088 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3089 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3090 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3091 PetscFunctionReturn(0); 3092 } 3093 3094 /* -------------------------------------------------------------------*/ 3095 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3096 MatGetRow_MPIAIJ, 3097 MatRestoreRow_MPIAIJ, 3098 MatMult_MPIAIJ, 3099 /* 4*/ MatMultAdd_MPIAIJ, 3100 MatMultTranspose_MPIAIJ, 3101 MatMultTransposeAdd_MPIAIJ, 3102 #if defined(PETSC_HAVE_PBGL) 3103 MatSolve_MPIAIJ, 3104 #else 3105 0, 3106 #endif 3107 0, 3108 0, 3109 /*10*/ 0, 3110 0, 3111 0, 3112 MatSOR_MPIAIJ, 3113 MatTranspose_MPIAIJ, 3114 /*15*/ MatGetInfo_MPIAIJ, 3115 MatEqual_MPIAIJ, 3116 MatGetDiagonal_MPIAIJ, 3117 MatDiagonalScale_MPIAIJ, 3118 MatNorm_MPIAIJ, 3119 /*20*/ MatAssemblyBegin_MPIAIJ, 3120 MatAssemblyEnd_MPIAIJ, 3121 MatSetOption_MPIAIJ, 3122 MatZeroEntries_MPIAIJ, 3123 /*24*/ MatZeroRows_MPIAIJ, 3124 0, 3125 #if defined(PETSC_HAVE_PBGL) 3126 0, 3127 #else 3128 0, 3129 #endif 3130 0, 3131 0, 3132 /*29*/ MatSetUp_MPIAIJ, 3133 #if defined(PETSC_HAVE_PBGL) 3134 0, 3135 #else 3136 0, 3137 #endif 3138 0, 3139 0, 3140 0, 3141 /*34*/ MatDuplicate_MPIAIJ, 3142 0, 3143 0, 3144 0, 3145 0, 3146 /*39*/ MatAXPY_MPIAIJ, 3147 MatGetSubMatrices_MPIAIJ, 3148 MatIncreaseOverlap_MPIAIJ, 3149 MatGetValues_MPIAIJ, 3150 MatCopy_MPIAIJ, 3151 /*44*/ MatGetRowMax_MPIAIJ, 3152 MatScale_MPIAIJ, 3153 0, 3154 0, 3155 MatZeroRowsColumns_MPIAIJ, 3156 /*49*/ MatSetRandom_MPIAIJ, 3157 0, 3158 0, 3159 0, 3160 0, 3161 /*54*/ MatFDColoringCreate_MPIAIJ, 3162 0, 3163 MatSetUnfactored_MPIAIJ, 3164 MatPermute_MPIAIJ, 3165 0, 3166 /*59*/ MatGetSubMatrix_MPIAIJ, 3167 MatDestroy_MPIAIJ, 3168 MatView_MPIAIJ, 3169 0, 3170 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3171 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3172 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3173 0, 3174 0, 3175 0, 3176 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3177 MatGetRowMinAbs_MPIAIJ, 3178 0, 3179 MatSetColoring_MPIAIJ, 3180 0, 3181 MatSetValuesAdifor_MPIAIJ, 3182 /*75*/ MatFDColoringApply_AIJ, 3183 0, 3184 0, 3185 0, 3186 MatFindZeroDiagonals_MPIAIJ, 3187 /*80*/ 0, 3188 0, 3189 0, 3190 /*83*/ MatLoad_MPIAIJ, 3191 0, 3192 0, 3193 0, 3194 0, 3195 0, 3196 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3197 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3198 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3199 MatPtAP_MPIAIJ_MPIAIJ, 3200 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3201 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3202 0, 3203 0, 3204 0, 3205 0, 3206 /*99*/ 0, 3207 0, 3208 0, 3209 MatConjugate_MPIAIJ, 3210 0, 3211 /*104*/MatSetValuesRow_MPIAIJ, 3212 MatRealPart_MPIAIJ, 3213 MatImaginaryPart_MPIAIJ, 3214 0, 3215 0, 3216 /*109*/0, 3217 MatGetRedundantMatrix_MPIAIJ, 3218 MatGetRowMin_MPIAIJ, 3219 0, 3220 0, 3221 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3222 0, 3223 0, 3224 0, 3225 0, 3226 /*119*/0, 3227 0, 3228 0, 3229 0, 3230 MatGetMultiProcBlock_MPIAIJ, 3231 /*124*/MatFindNonzeroRows_MPIAIJ, 3232 MatGetColumnNorms_MPIAIJ, 3233 MatInvertBlockDiagonal_MPIAIJ, 3234 0, 3235 MatGetSubMatricesParallel_MPIAIJ, 3236 /*129*/0, 3237 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3238 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3239 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3240 0, 3241 /*134*/0, 3242 0, 3243 0, 3244 0, 3245 0 3246 }; 3247 3248 /* ----------------------------------------------------------------------------------------*/ 3249 3250 EXTERN_C_BEGIN 3251 #undef __FUNCT__ 3252 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3253 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3254 { 3255 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3256 PetscErrorCode ierr; 3257 3258 PetscFunctionBegin; 3259 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3260 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3261 PetscFunctionReturn(0); 3262 } 3263 EXTERN_C_END 3264 3265 EXTERN_C_BEGIN 3266 #undef __FUNCT__ 3267 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3268 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3269 { 3270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3271 PetscErrorCode ierr; 3272 3273 PetscFunctionBegin; 3274 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3275 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3276 PetscFunctionReturn(0); 3277 } 3278 EXTERN_C_END 3279 3280 EXTERN_C_BEGIN 3281 #undef __FUNCT__ 3282 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3283 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3284 { 3285 Mat_MPIAIJ *b; 3286 PetscErrorCode ierr; 3287 PetscInt i; 3288 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3289 3290 PetscFunctionBegin; 3291 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3292 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3293 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3294 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3295 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3296 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3297 3298 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3299 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3300 if (d_nnz) { 3301 for (i=0; i<B->rmap->n; i++) { 3302 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3303 } 3304 } 3305 if (o_nnz) { 3306 for (i=0; i<B->rmap->n; i++) { 3307 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3308 } 3309 } 3310 b = (Mat_MPIAIJ*)B->data; 3311 3312 if (!B->preallocated) { 3313 /* Explicitly create 2 MATSEQAIJ matrices. */ 3314 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3315 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3316 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3317 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3318 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3319 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3320 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3321 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3322 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3323 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3324 } 3325 3326 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3327 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3328 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3329 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3330 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3331 B->preallocated = PETSC_TRUE; 3332 PetscFunctionReturn(0); 3333 } 3334 EXTERN_C_END 3335 3336 #undef __FUNCT__ 3337 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3338 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3339 { 3340 Mat mat; 3341 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3342 PetscErrorCode ierr; 3343 3344 PetscFunctionBegin; 3345 *newmat = 0; 3346 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3347 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3348 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3349 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3350 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3351 a = (Mat_MPIAIJ*)mat->data; 3352 3353 mat->factortype = matin->factortype; 3354 mat->rmap->bs = matin->rmap->bs; 3355 mat->cmap->bs = matin->cmap->bs; 3356 mat->assembled = PETSC_TRUE; 3357 mat->insertmode = NOT_SET_VALUES; 3358 mat->preallocated = PETSC_TRUE; 3359 3360 a->size = oldmat->size; 3361 a->rank = oldmat->rank; 3362 a->donotstash = oldmat->donotstash; 3363 a->roworiented = oldmat->roworiented; 3364 a->rowindices = 0; 3365 a->rowvalues = 0; 3366 a->getrowactive = PETSC_FALSE; 3367 3368 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3369 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3370 3371 if (oldmat->colmap) { 3372 #if defined(PETSC_USE_CTABLE) 3373 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3374 #else 3375 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3376 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3377 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3378 #endif 3379 } else a->colmap = 0; 3380 if (oldmat->garray) { 3381 PetscInt len; 3382 len = oldmat->B->cmap->n; 3383 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3384 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3385 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3386 } else a->garray = 0; 3387 3388 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3389 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3390 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3391 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3392 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3393 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3394 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3395 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3396 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3397 *newmat = mat; 3398 PetscFunctionReturn(0); 3399 } 3400 3401 3402 3403 #undef __FUNCT__ 3404 #define __FUNCT__ "MatLoad_MPIAIJ" 3405 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3406 { 3407 PetscScalar *vals,*svals; 3408 MPI_Comm comm = ((PetscObject)viewer)->comm; 3409 PetscErrorCode ierr; 3410 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3411 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3412 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3413 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3414 PetscInt cend,cstart,n,*rowners,sizesset=1; 3415 int fd; 3416 PetscInt bs = 1; 3417 3418 PetscFunctionBegin; 3419 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3420 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3421 if (!rank) { 3422 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3423 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3424 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3425 } 3426 3427 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3428 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3429 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3430 3431 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3432 3433 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3434 M = header[1]; N = header[2]; 3435 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3436 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3437 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3438 3439 /* If global sizes are set, check if they are consistent with that given in the file */ 3440 if (sizesset) { 3441 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3442 } 3443 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3444 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3445 3446 /* determine ownership of all (block) rows */ 3447 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3448 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3449 else m = newMat->rmap->n; /* Set by user */ 3450 3451 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3452 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3453 3454 /* First process needs enough room for process with most rows */ 3455 if (!rank) { 3456 mmax = rowners[1]; 3457 for (i=2; i<=size; i++) { 3458 mmax = PetscMax(mmax, rowners[i]); 3459 } 3460 } else mmax = m; 3461 3462 rowners[0] = 0; 3463 for (i=2; i<=size; i++) { 3464 rowners[i] += rowners[i-1]; 3465 } 3466 rstart = rowners[rank]; 3467 rend = rowners[rank+1]; 3468 3469 /* distribute row lengths to all processors */ 3470 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3471 if (!rank) { 3472 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3473 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3474 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3475 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3476 for (j=0; j<m; j++) { 3477 procsnz[0] += ourlens[j]; 3478 } 3479 for (i=1; i<size; i++) { 3480 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3481 /* calculate the number of nonzeros on each processor */ 3482 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3483 procsnz[i] += rowlengths[j]; 3484 } 3485 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3486 } 3487 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3488 } else { 3489 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3490 } 3491 3492 if (!rank) { 3493 /* determine max buffer needed and allocate it */ 3494 maxnz = 0; 3495 for (i=0; i<size; i++) { 3496 maxnz = PetscMax(maxnz,procsnz[i]); 3497 } 3498 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3499 3500 /* read in my part of the matrix column indices */ 3501 nz = procsnz[0]; 3502 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3503 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3504 3505 /* read in every one elses and ship off */ 3506 for (i=1; i<size; i++) { 3507 nz = procsnz[i]; 3508 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3509 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3510 } 3511 ierr = PetscFree(cols);CHKERRQ(ierr); 3512 } else { 3513 /* determine buffer space needed for message */ 3514 nz = 0; 3515 for (i=0; i<m; i++) { 3516 nz += ourlens[i]; 3517 } 3518 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3519 3520 /* receive message of column indices*/ 3521 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3522 } 3523 3524 /* determine column ownership if matrix is not square */ 3525 if (N != M) { 3526 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3527 else n = newMat->cmap->n; 3528 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3529 cstart = cend - n; 3530 } else { 3531 cstart = rstart; 3532 cend = rend; 3533 n = cend - cstart; 3534 } 3535 3536 /* loop over local rows, determining number of off diagonal entries */ 3537 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3538 jj = 0; 3539 for (i=0; i<m; i++) { 3540 for (j=0; j<ourlens[i]; j++) { 3541 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3542 jj++; 3543 } 3544 } 3545 3546 for (i=0; i<m; i++) { 3547 ourlens[i] -= offlens[i]; 3548 } 3549 if (!sizesset) { 3550 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3551 } 3552 3553 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3554 3555 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3556 3557 for (i=0; i<m; i++) { 3558 ourlens[i] += offlens[i]; 3559 } 3560 3561 if (!rank) { 3562 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3563 3564 /* read in my part of the matrix numerical values */ 3565 nz = procsnz[0]; 3566 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3567 3568 /* insert into matrix */ 3569 jj = rstart; 3570 smycols = mycols; 3571 svals = vals; 3572 for (i=0; i<m; i++) { 3573 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3574 smycols += ourlens[i]; 3575 svals += ourlens[i]; 3576 jj++; 3577 } 3578 3579 /* read in other processors and ship out */ 3580 for (i=1; i<size; i++) { 3581 nz = procsnz[i]; 3582 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3583 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3584 } 3585 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3586 } else { 3587 /* receive numeric values */ 3588 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3589 3590 /* receive message of values*/ 3591 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3592 3593 /* insert into matrix */ 3594 jj = rstart; 3595 smycols = mycols; 3596 svals = vals; 3597 for (i=0; i<m; i++) { 3598 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3599 smycols += ourlens[i]; 3600 svals += ourlens[i]; 3601 jj++; 3602 } 3603 } 3604 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3605 ierr = PetscFree(vals);CHKERRQ(ierr); 3606 ierr = PetscFree(mycols);CHKERRQ(ierr); 3607 ierr = PetscFree(rowners);CHKERRQ(ierr); 3608 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3609 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3610 PetscFunctionReturn(0); 3611 } 3612 3613 #undef __FUNCT__ 3614 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3615 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3616 { 3617 PetscErrorCode ierr; 3618 IS iscol_local; 3619 PetscInt csize; 3620 3621 PetscFunctionBegin; 3622 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3623 if (call == MAT_REUSE_MATRIX) { 3624 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3625 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3626 } else { 3627 PetscInt cbs; 3628 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3629 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3630 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3631 } 3632 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3633 if (call == MAT_INITIAL_MATRIX) { 3634 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3635 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3636 } 3637 PetscFunctionReturn(0); 3638 } 3639 3640 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3641 #undef __FUNCT__ 3642 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3643 /* 3644 Not great since it makes two copies of the submatrix, first an SeqAIJ 3645 in local and then by concatenating the local matrices the end result. 3646 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3647 3648 Note: This requires a sequential iscol with all indices. 3649 */ 3650 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3651 { 3652 PetscErrorCode ierr; 3653 PetscMPIInt rank,size; 3654 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3655 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3656 PetscBool allcolumns, colflag; 3657 Mat M,Mreuse; 3658 MatScalar *vwork,*aa; 3659 MPI_Comm comm = ((PetscObject)mat)->comm; 3660 Mat_SeqAIJ *aij; 3661 3662 3663 PetscFunctionBegin; 3664 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3665 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3666 3667 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3668 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3669 if (colflag && ncol == mat->cmap->N) { 3670 allcolumns = PETSC_TRUE; 3671 } else { 3672 allcolumns = PETSC_FALSE; 3673 } 3674 if (call == MAT_REUSE_MATRIX) { 3675 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3676 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3677 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3678 } else { 3679 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3680 } 3681 3682 /* 3683 m - number of local rows 3684 n - number of columns (same on all processors) 3685 rstart - first row in new global matrix generated 3686 */ 3687 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3688 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3689 if (call == MAT_INITIAL_MATRIX) { 3690 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3691 ii = aij->i; 3692 jj = aij->j; 3693 3694 /* 3695 Determine the number of non-zeros in the diagonal and off-diagonal 3696 portions of the matrix in order to do correct preallocation 3697 */ 3698 3699 /* first get start and end of "diagonal" columns */ 3700 if (csize == PETSC_DECIDE) { 3701 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3702 if (mglobal == n) { /* square matrix */ 3703 nlocal = m; 3704 } else { 3705 nlocal = n/size + ((n % size) > rank); 3706 } 3707 } else { 3708 nlocal = csize; 3709 } 3710 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3711 rstart = rend - nlocal; 3712 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3713 3714 /* next, compute all the lengths */ 3715 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3716 olens = dlens + m; 3717 for (i=0; i<m; i++) { 3718 jend = ii[i+1] - ii[i]; 3719 olen = 0; 3720 dlen = 0; 3721 for (j=0; j<jend; j++) { 3722 if (*jj < rstart || *jj >= rend) olen++; 3723 else dlen++; 3724 jj++; 3725 } 3726 olens[i] = olen; 3727 dlens[i] = dlen; 3728 } 3729 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3730 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3731 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3732 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3733 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3734 ierr = PetscFree(dlens);CHKERRQ(ierr); 3735 } else { 3736 PetscInt ml,nl; 3737 3738 M = *newmat; 3739 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3740 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3741 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3742 /* 3743 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3744 rather than the slower MatSetValues(). 3745 */ 3746 M->was_assembled = PETSC_TRUE; 3747 M->assembled = PETSC_FALSE; 3748 } 3749 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3750 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3751 ii = aij->i; 3752 jj = aij->j; 3753 aa = aij->a; 3754 for (i=0; i<m; i++) { 3755 row = rstart + i; 3756 nz = ii[i+1] - ii[i]; 3757 cwork = jj; jj += nz; 3758 vwork = aa; aa += nz; 3759 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3760 } 3761 3762 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3763 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3764 *newmat = M; 3765 3766 /* save submatrix used in processor for next request */ 3767 if (call == MAT_INITIAL_MATRIX) { 3768 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3769 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3770 } 3771 PetscFunctionReturn(0); 3772 } 3773 3774 EXTERN_C_BEGIN 3775 #undef __FUNCT__ 3776 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3777 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3778 { 3779 PetscInt m,cstart, cend,j,nnz,i,d; 3780 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3781 const PetscInt *JJ; 3782 PetscScalar *values; 3783 PetscErrorCode ierr; 3784 3785 PetscFunctionBegin; 3786 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3787 3788 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3789 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3790 m = B->rmap->n; 3791 cstart = B->cmap->rstart; 3792 cend = B->cmap->rend; 3793 rstart = B->rmap->rstart; 3794 3795 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3796 3797 #if defined(PETSC_USE_DEBUGGING) 3798 for (i=0; i<m; i++) { 3799 nnz = Ii[i+1]- Ii[i]; 3800 JJ = J + Ii[i]; 3801 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3802 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3803 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3804 } 3805 #endif 3806 3807 for (i=0; i<m; i++) { 3808 nnz = Ii[i+1]- Ii[i]; 3809 JJ = J + Ii[i]; 3810 nnz_max = PetscMax(nnz_max,nnz); 3811 d = 0; 3812 for (j=0; j<nnz; j++) { 3813 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3814 } 3815 d_nnz[i] = d; 3816 o_nnz[i] = nnz - d; 3817 } 3818 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3819 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3820 3821 if (v) values = (PetscScalar*)v; 3822 else { 3823 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3824 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3825 } 3826 3827 for (i=0; i<m; i++) { 3828 ii = i + rstart; 3829 nnz = Ii[i+1]- Ii[i]; 3830 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3831 } 3832 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3833 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3834 3835 if (!v) { 3836 ierr = PetscFree(values);CHKERRQ(ierr); 3837 } 3838 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3839 PetscFunctionReturn(0); 3840 } 3841 EXTERN_C_END 3842 3843 #undef __FUNCT__ 3844 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3845 /*@ 3846 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3847 (the default parallel PETSc format). 3848 3849 Collective on MPI_Comm 3850 3851 Input Parameters: 3852 + B - the matrix 3853 . i - the indices into j for the start of each local row (starts with zero) 3854 . j - the column indices for each local row (starts with zero) 3855 - v - optional values in the matrix 3856 3857 Level: developer 3858 3859 Notes: 3860 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3861 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3862 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3863 3864 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3865 3866 The format which is used for the sparse matrix input, is equivalent to a 3867 row-major ordering.. i.e for the following matrix, the input data expected is 3868 as shown: 3869 3870 1 0 0 3871 2 0 3 P0 3872 ------- 3873 4 5 6 P1 3874 3875 Process0 [P0]: rows_owned=[0,1] 3876 i = {0,1,3} [size = nrow+1 = 2+1] 3877 j = {0,0,2} [size = nz = 6] 3878 v = {1,2,3} [size = nz = 6] 3879 3880 Process1 [P1]: rows_owned=[2] 3881 i = {0,3} [size = nrow+1 = 1+1] 3882 j = {0,1,2} [size = nz = 6] 3883 v = {4,5,6} [size = nz = 6] 3884 3885 .keywords: matrix, aij, compressed row, sparse, parallel 3886 3887 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3888 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3889 @*/ 3890 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3891 { 3892 PetscErrorCode ierr; 3893 3894 PetscFunctionBegin; 3895 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3896 PetscFunctionReturn(0); 3897 } 3898 3899 #undef __FUNCT__ 3900 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3901 /*@C 3902 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3903 (the default parallel PETSc format). For good matrix assembly performance 3904 the user should preallocate the matrix storage by setting the parameters 3905 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3906 performance can be increased by more than a factor of 50. 3907 3908 Collective on MPI_Comm 3909 3910 Input Parameters: 3911 + A - the matrix 3912 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3913 (same value is used for all local rows) 3914 . d_nnz - array containing the number of nonzeros in the various rows of the 3915 DIAGONAL portion of the local submatrix (possibly different for each row) 3916 or NULL, if d_nz is used to specify the nonzero structure. 3917 The size of this array is equal to the number of local rows, i.e 'm'. 3918 For matrices that will be factored, you must leave room for (and set) 3919 the diagonal entry even if it is zero. 3920 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3921 submatrix (same value is used for all local rows). 3922 - o_nnz - array containing the number of nonzeros in the various rows of the 3923 OFF-DIAGONAL portion of the local submatrix (possibly different for 3924 each row) or NULL, if o_nz is used to specify the nonzero 3925 structure. The size of this array is equal to the number 3926 of local rows, i.e 'm'. 3927 3928 If the *_nnz parameter is given then the *_nz parameter is ignored 3929 3930 The AIJ format (also called the Yale sparse matrix format or 3931 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3932 storage. The stored row and column indices begin with zero. 3933 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3934 3935 The parallel matrix is partitioned such that the first m0 rows belong to 3936 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3937 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3938 3939 The DIAGONAL portion of the local submatrix of a processor can be defined 3940 as the submatrix which is obtained by extraction the part corresponding to 3941 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3942 first row that belongs to the processor, r2 is the last row belonging to 3943 the this processor, and c1-c2 is range of indices of the local part of a 3944 vector suitable for applying the matrix to. This is an mxn matrix. In the 3945 common case of a square matrix, the row and column ranges are the same and 3946 the DIAGONAL part is also square. The remaining portion of the local 3947 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3948 3949 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3950 3951 You can call MatGetInfo() to get information on how effective the preallocation was; 3952 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3953 You can also run with the option -info and look for messages with the string 3954 malloc in them to see if additional memory allocation was needed. 3955 3956 Example usage: 3957 3958 Consider the following 8x8 matrix with 34 non-zero values, that is 3959 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3960 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3961 as follows: 3962 3963 .vb 3964 1 2 0 | 0 3 0 | 0 4 3965 Proc0 0 5 6 | 7 0 0 | 8 0 3966 9 0 10 | 11 0 0 | 12 0 3967 ------------------------------------- 3968 13 0 14 | 15 16 17 | 0 0 3969 Proc1 0 18 0 | 19 20 21 | 0 0 3970 0 0 0 | 22 23 0 | 24 0 3971 ------------------------------------- 3972 Proc2 25 26 27 | 0 0 28 | 29 0 3973 30 0 0 | 31 32 33 | 0 34 3974 .ve 3975 3976 This can be represented as a collection of submatrices as: 3977 3978 .vb 3979 A B C 3980 D E F 3981 G H I 3982 .ve 3983 3984 Where the submatrices A,B,C are owned by proc0, D,E,F are 3985 owned by proc1, G,H,I are owned by proc2. 3986 3987 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3988 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3989 The 'M','N' parameters are 8,8, and have the same values on all procs. 3990 3991 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3992 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3993 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3994 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3995 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3996 matrix, ans [DF] as another SeqAIJ matrix. 3997 3998 When d_nz, o_nz parameters are specified, d_nz storage elements are 3999 allocated for every row of the local diagonal submatrix, and o_nz 4000 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4001 One way to choose d_nz and o_nz is to use the max nonzerors per local 4002 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4003 In this case, the values of d_nz,o_nz are: 4004 .vb 4005 proc0 : dnz = 2, o_nz = 2 4006 proc1 : dnz = 3, o_nz = 2 4007 proc2 : dnz = 1, o_nz = 4 4008 .ve 4009 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4010 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4011 for proc3. i.e we are using 12+15+10=37 storage locations to store 4012 34 values. 4013 4014 When d_nnz, o_nnz parameters are specified, the storage is specified 4015 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4016 In the above case the values for d_nnz,o_nnz are: 4017 .vb 4018 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4019 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4020 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4021 .ve 4022 Here the space allocated is sum of all the above values i.e 34, and 4023 hence pre-allocation is perfect. 4024 4025 Level: intermediate 4026 4027 .keywords: matrix, aij, compressed row, sparse, parallel 4028 4029 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4030 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4031 @*/ 4032 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4033 { 4034 PetscErrorCode ierr; 4035 4036 PetscFunctionBegin; 4037 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4038 PetscValidType(B,1); 4039 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4040 PetscFunctionReturn(0); 4041 } 4042 4043 #undef __FUNCT__ 4044 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4045 /*@ 4046 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4047 CSR format the local rows. 4048 4049 Collective on MPI_Comm 4050 4051 Input Parameters: 4052 + comm - MPI communicator 4053 . m - number of local rows (Cannot be PETSC_DECIDE) 4054 . n - This value should be the same as the local size used in creating the 4055 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4056 calculated if N is given) For square matrices n is almost always m. 4057 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4058 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4059 . i - row indices 4060 . j - column indices 4061 - a - matrix values 4062 4063 Output Parameter: 4064 . mat - the matrix 4065 4066 Level: intermediate 4067 4068 Notes: 4069 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4070 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4071 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4072 4073 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4074 4075 The format which is used for the sparse matrix input, is equivalent to a 4076 row-major ordering.. i.e for the following matrix, the input data expected is 4077 as shown: 4078 4079 1 0 0 4080 2 0 3 P0 4081 ------- 4082 4 5 6 P1 4083 4084 Process0 [P0]: rows_owned=[0,1] 4085 i = {0,1,3} [size = nrow+1 = 2+1] 4086 j = {0,0,2} [size = nz = 6] 4087 v = {1,2,3} [size = nz = 6] 4088 4089 Process1 [P1]: rows_owned=[2] 4090 i = {0,3} [size = nrow+1 = 1+1] 4091 j = {0,1,2} [size = nz = 6] 4092 v = {4,5,6} [size = nz = 6] 4093 4094 .keywords: matrix, aij, compressed row, sparse, parallel 4095 4096 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4097 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4098 @*/ 4099 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4100 { 4101 PetscErrorCode ierr; 4102 4103 PetscFunctionBegin; 4104 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4105 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4106 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4107 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4108 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4109 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4110 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4111 PetscFunctionReturn(0); 4112 } 4113 4114 #undef __FUNCT__ 4115 #define __FUNCT__ "MatCreateAIJ" 4116 /*@C 4117 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4118 (the default parallel PETSc format). For good matrix assembly performance 4119 the user should preallocate the matrix storage by setting the parameters 4120 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4121 performance can be increased by more than a factor of 50. 4122 4123 Collective on MPI_Comm 4124 4125 Input Parameters: 4126 + comm - MPI communicator 4127 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4128 This value should be the same as the local size used in creating the 4129 y vector for the matrix-vector product y = Ax. 4130 . n - This value should be the same as the local size used in creating the 4131 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4132 calculated if N is given) For square matrices n is almost always m. 4133 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4134 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4135 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4136 (same value is used for all local rows) 4137 . d_nnz - array containing the number of nonzeros in the various rows of the 4138 DIAGONAL portion of the local submatrix (possibly different for each row) 4139 or NULL, if d_nz is used to specify the nonzero structure. 4140 The size of this array is equal to the number of local rows, i.e 'm'. 4141 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4142 submatrix (same value is used for all local rows). 4143 - o_nnz - array containing the number of nonzeros in the various rows of the 4144 OFF-DIAGONAL portion of the local submatrix (possibly different for 4145 each row) or NULL, if o_nz is used to specify the nonzero 4146 structure. The size of this array is equal to the number 4147 of local rows, i.e 'm'. 4148 4149 Output Parameter: 4150 . A - the matrix 4151 4152 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4153 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4154 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4155 4156 Notes: 4157 If the *_nnz parameter is given then the *_nz parameter is ignored 4158 4159 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4160 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4161 storage requirements for this matrix. 4162 4163 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4164 processor than it must be used on all processors that share the object for 4165 that argument. 4166 4167 The user MUST specify either the local or global matrix dimensions 4168 (possibly both). 4169 4170 The parallel matrix is partitioned across processors such that the 4171 first m0 rows belong to process 0, the next m1 rows belong to 4172 process 1, the next m2 rows belong to process 2 etc.. where 4173 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4174 values corresponding to [m x N] submatrix. 4175 4176 The columns are logically partitioned with the n0 columns belonging 4177 to 0th partition, the next n1 columns belonging to the next 4178 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4179 4180 The DIAGONAL portion of the local submatrix on any given processor 4181 is the submatrix corresponding to the rows and columns m,n 4182 corresponding to the given processor. i.e diagonal matrix on 4183 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4184 etc. The remaining portion of the local submatrix [m x (N-n)] 4185 constitute the OFF-DIAGONAL portion. The example below better 4186 illustrates this concept. 4187 4188 For a square global matrix we define each processor's diagonal portion 4189 to be its local rows and the corresponding columns (a square submatrix); 4190 each processor's off-diagonal portion encompasses the remainder of the 4191 local matrix (a rectangular submatrix). 4192 4193 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4194 4195 When calling this routine with a single process communicator, a matrix of 4196 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4197 type of communicator, use the construction mechanism: 4198 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4199 4200 By default, this format uses inodes (identical nodes) when possible. 4201 We search for consecutive rows with the same nonzero structure, thereby 4202 reusing matrix information to achieve increased efficiency. 4203 4204 Options Database Keys: 4205 + -mat_no_inode - Do not use inodes 4206 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4207 - -mat_aij_oneindex - Internally use indexing starting at 1 4208 rather than 0. Note that when calling MatSetValues(), 4209 the user still MUST index entries starting at 0! 4210 4211 4212 Example usage: 4213 4214 Consider the following 8x8 matrix with 34 non-zero values, that is 4215 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4216 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4217 as follows: 4218 4219 .vb 4220 1 2 0 | 0 3 0 | 0 4 4221 Proc0 0 5 6 | 7 0 0 | 8 0 4222 9 0 10 | 11 0 0 | 12 0 4223 ------------------------------------- 4224 13 0 14 | 15 16 17 | 0 0 4225 Proc1 0 18 0 | 19 20 21 | 0 0 4226 0 0 0 | 22 23 0 | 24 0 4227 ------------------------------------- 4228 Proc2 25 26 27 | 0 0 28 | 29 0 4229 30 0 0 | 31 32 33 | 0 34 4230 .ve 4231 4232 This can be represented as a collection of submatrices as: 4233 4234 .vb 4235 A B C 4236 D E F 4237 G H I 4238 .ve 4239 4240 Where the submatrices A,B,C are owned by proc0, D,E,F are 4241 owned by proc1, G,H,I are owned by proc2. 4242 4243 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4244 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4245 The 'M','N' parameters are 8,8, and have the same values on all procs. 4246 4247 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4248 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4249 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4250 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4251 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4252 matrix, ans [DF] as another SeqAIJ matrix. 4253 4254 When d_nz, o_nz parameters are specified, d_nz storage elements are 4255 allocated for every row of the local diagonal submatrix, and o_nz 4256 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4257 One way to choose d_nz and o_nz is to use the max nonzerors per local 4258 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4259 In this case, the values of d_nz,o_nz are: 4260 .vb 4261 proc0 : dnz = 2, o_nz = 2 4262 proc1 : dnz = 3, o_nz = 2 4263 proc2 : dnz = 1, o_nz = 4 4264 .ve 4265 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4266 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4267 for proc3. i.e we are using 12+15+10=37 storage locations to store 4268 34 values. 4269 4270 When d_nnz, o_nnz parameters are specified, the storage is specified 4271 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4272 In the above case the values for d_nnz,o_nnz are: 4273 .vb 4274 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4275 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4276 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4277 .ve 4278 Here the space allocated is sum of all the above values i.e 34, and 4279 hence pre-allocation is perfect. 4280 4281 Level: intermediate 4282 4283 .keywords: matrix, aij, compressed row, sparse, parallel 4284 4285 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4286 MPIAIJ, MatCreateMPIAIJWithArrays() 4287 @*/ 4288 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4289 { 4290 PetscErrorCode ierr; 4291 PetscMPIInt size; 4292 4293 PetscFunctionBegin; 4294 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4295 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4296 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4297 if (size > 1) { 4298 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4299 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4300 } else { 4301 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4302 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4303 } 4304 PetscFunctionReturn(0); 4305 } 4306 4307 #undef __FUNCT__ 4308 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4309 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4310 { 4311 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4312 4313 PetscFunctionBegin; 4314 *Ad = a->A; 4315 *Ao = a->B; 4316 *colmap = a->garray; 4317 PetscFunctionReturn(0); 4318 } 4319 4320 #undef __FUNCT__ 4321 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4322 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4323 { 4324 PetscErrorCode ierr; 4325 PetscInt i; 4326 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4327 4328 PetscFunctionBegin; 4329 if (coloring->ctype == IS_COLORING_GLOBAL) { 4330 ISColoringValue *allcolors,*colors; 4331 ISColoring ocoloring; 4332 4333 /* set coloring for diagonal portion */ 4334 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4335 4336 /* set coloring for off-diagonal portion */ 4337 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4338 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4339 for (i=0; i<a->B->cmap->n; i++) { 4340 colors[i] = allcolors[a->garray[i]]; 4341 } 4342 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4343 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4344 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4345 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4346 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4347 ISColoringValue *colors; 4348 PetscInt *larray; 4349 ISColoring ocoloring; 4350 4351 /* set coloring for diagonal portion */ 4352 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4353 for (i=0; i<a->A->cmap->n; i++) { 4354 larray[i] = i + A->cmap->rstart; 4355 } 4356 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4357 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4358 for (i=0; i<a->A->cmap->n; i++) { 4359 colors[i] = coloring->colors[larray[i]]; 4360 } 4361 ierr = PetscFree(larray);CHKERRQ(ierr); 4362 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4363 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4364 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4365 4366 /* set coloring for off-diagonal portion */ 4367 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4368 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4369 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4370 for (i=0; i<a->B->cmap->n; i++) { 4371 colors[i] = coloring->colors[larray[i]]; 4372 } 4373 ierr = PetscFree(larray);CHKERRQ(ierr); 4374 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4375 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4376 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4377 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4378 PetscFunctionReturn(0); 4379 } 4380 4381 #undef __FUNCT__ 4382 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4383 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4384 { 4385 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4386 PetscErrorCode ierr; 4387 4388 PetscFunctionBegin; 4389 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4390 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4391 PetscFunctionReturn(0); 4392 } 4393 4394 #undef __FUNCT__ 4395 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4396 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4397 { 4398 PetscErrorCode ierr; 4399 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4400 PetscInt *indx; 4401 4402 PetscFunctionBegin; 4403 /* This routine will ONLY return MPIAIJ type matrix */ 4404 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4405 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4406 if (n == PETSC_DECIDE) { 4407 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4408 } 4409 /* Check sum(n) = N */ 4410 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4411 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4412 4413 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4414 rstart -= m; 4415 4416 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4417 for (i=0; i<m; i++) { 4418 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4419 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4420 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4421 } 4422 4423 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4424 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4425 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4426 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4427 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4428 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4429 PetscFunctionReturn(0); 4430 } 4431 4432 #undef __FUNCT__ 4433 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4434 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4435 { 4436 PetscErrorCode ierr; 4437 PetscInt m,N,i,rstart,nnz,Ii; 4438 PetscInt *indx; 4439 PetscScalar *values; 4440 4441 PetscFunctionBegin; 4442 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4443 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4444 for (i=0; i<m; i++) { 4445 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4446 Ii = i + rstart; 4447 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4448 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4449 } 4450 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4451 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4452 PetscFunctionReturn(0); 4453 } 4454 4455 #undef __FUNCT__ 4456 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4457 /*@ 4458 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4459 matrices from each processor 4460 4461 Collective on MPI_Comm 4462 4463 Input Parameters: 4464 + comm - the communicators the parallel matrix will live on 4465 . inmat - the input sequential matrices 4466 . n - number of local columns (or PETSC_DECIDE) 4467 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4468 4469 Output Parameter: 4470 . outmat - the parallel matrix generated 4471 4472 Level: advanced 4473 4474 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4475 4476 @*/ 4477 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4478 { 4479 PetscErrorCode ierr; 4480 4481 PetscFunctionBegin; 4482 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4483 if (scall == MAT_INITIAL_MATRIX) { 4484 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4485 } 4486 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4487 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4488 PetscFunctionReturn(0); 4489 } 4490 4491 #undef __FUNCT__ 4492 #define __FUNCT__ "MatFileSplit" 4493 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4494 { 4495 PetscErrorCode ierr; 4496 PetscMPIInt rank; 4497 PetscInt m,N,i,rstart,nnz; 4498 size_t len; 4499 const PetscInt *indx; 4500 PetscViewer out; 4501 char *name; 4502 Mat B; 4503 const PetscScalar *values; 4504 4505 PetscFunctionBegin; 4506 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4507 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4508 /* Should this be the type of the diagonal block of A? */ 4509 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4510 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4511 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4512 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4513 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4514 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4515 for (i=0; i<m; i++) { 4516 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4517 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4518 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4519 } 4520 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4521 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4522 4523 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4524 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4525 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4526 sprintf(name,"%s.%d",outfile,rank); 4527 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4528 ierr = PetscFree(name);CHKERRQ(ierr); 4529 ierr = MatView(B,out);CHKERRQ(ierr); 4530 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4531 ierr = MatDestroy(&B);CHKERRQ(ierr); 4532 PetscFunctionReturn(0); 4533 } 4534 4535 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4536 #undef __FUNCT__ 4537 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4538 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4539 { 4540 PetscErrorCode ierr; 4541 Mat_Merge_SeqsToMPI *merge; 4542 PetscContainer container; 4543 4544 PetscFunctionBegin; 4545 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4546 if (container) { 4547 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4548 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4557 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4558 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4559 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4560 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4561 ierr = PetscFree(merge);CHKERRQ(ierr); 4562 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4563 } 4564 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4565 PetscFunctionReturn(0); 4566 } 4567 4568 #include <../src/mat/utils/freespace.h> 4569 #include <petscbt.h> 4570 4571 #undef __FUNCT__ 4572 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4573 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4574 { 4575 PetscErrorCode ierr; 4576 MPI_Comm comm=((PetscObject)mpimat)->comm; 4577 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4578 PetscMPIInt size,rank,taga,*len_s; 4579 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4580 PetscInt proc,m; 4581 PetscInt **buf_ri,**buf_rj; 4582 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4583 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4584 MPI_Request *s_waits,*r_waits; 4585 MPI_Status *status; 4586 MatScalar *aa=a->a; 4587 MatScalar **abuf_r,*ba_i; 4588 Mat_Merge_SeqsToMPI *merge; 4589 PetscContainer container; 4590 4591 PetscFunctionBegin; 4592 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4593 4594 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4595 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4596 4597 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4598 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4599 4600 bi = merge->bi; 4601 bj = merge->bj; 4602 buf_ri = merge->buf_ri; 4603 buf_rj = merge->buf_rj; 4604 4605 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4606 owners = merge->rowmap->range; 4607 len_s = merge->len_s; 4608 4609 /* send and recv matrix values */ 4610 /*-----------------------------*/ 4611 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4612 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4613 4614 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4615 for (proc=0,k=0; proc<size; proc++) { 4616 if (!len_s[proc]) continue; 4617 i = owners[proc]; 4618 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4619 k++; 4620 } 4621 4622 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4623 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4624 ierr = PetscFree(status);CHKERRQ(ierr); 4625 4626 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4627 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4628 4629 /* insert mat values of mpimat */ 4630 /*----------------------------*/ 4631 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4632 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4633 4634 for (k=0; k<merge->nrecv; k++) { 4635 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4636 nrows = *(buf_ri_k[k]); 4637 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4638 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4639 } 4640 4641 /* set values of ba */ 4642 m = merge->rowmap->n; 4643 for (i=0; i<m; i++) { 4644 arow = owners[rank] + i; 4645 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4646 bnzi = bi[i+1] - bi[i]; 4647 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4648 4649 /* add local non-zero vals of this proc's seqmat into ba */ 4650 anzi = ai[arow+1] - ai[arow]; 4651 aj = a->j + ai[arow]; 4652 aa = a->a + ai[arow]; 4653 nextaj = 0; 4654 for (j=0; nextaj<anzi; j++) { 4655 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4656 ba_i[j] += aa[nextaj++]; 4657 } 4658 } 4659 4660 /* add received vals into ba */ 4661 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4662 /* i-th row */ 4663 if (i == *nextrow[k]) { 4664 anzi = *(nextai[k]+1) - *nextai[k]; 4665 aj = buf_rj[k] + *(nextai[k]); 4666 aa = abuf_r[k] + *(nextai[k]); 4667 nextaj = 0; 4668 for (j=0; nextaj<anzi; j++) { 4669 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4670 ba_i[j] += aa[nextaj++]; 4671 } 4672 } 4673 nextrow[k]++; nextai[k]++; 4674 } 4675 } 4676 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4677 } 4678 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4679 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4680 4681 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4682 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4683 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4684 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4685 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4686 PetscFunctionReturn(0); 4687 } 4688 4689 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4690 4691 #undef __FUNCT__ 4692 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4693 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4694 { 4695 PetscErrorCode ierr; 4696 Mat B_mpi; 4697 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4698 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4699 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4700 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4701 PetscInt len,proc,*dnz,*onz,bs,cbs; 4702 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4703 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4704 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4705 MPI_Status *status; 4706 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4707 PetscBT lnkbt; 4708 Mat_Merge_SeqsToMPI *merge; 4709 PetscContainer container; 4710 4711 PetscFunctionBegin; 4712 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4713 4714 /* make sure it is a PETSc comm */ 4715 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4716 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4717 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4718 4719 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4720 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4721 4722 /* determine row ownership */ 4723 /*---------------------------------------------------------*/ 4724 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4725 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4726 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4727 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4728 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4729 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4730 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4731 4732 m = merge->rowmap->n; 4733 owners = merge->rowmap->range; 4734 4735 /* determine the number of messages to send, their lengths */ 4736 /*---------------------------------------------------------*/ 4737 len_s = merge->len_s; 4738 4739 len = 0; /* length of buf_si[] */ 4740 merge->nsend = 0; 4741 for (proc=0; proc<size; proc++) { 4742 len_si[proc] = 0; 4743 if (proc == rank) { 4744 len_s[proc] = 0; 4745 } else { 4746 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4747 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4748 } 4749 if (len_s[proc]) { 4750 merge->nsend++; 4751 nrows = 0; 4752 for (i=owners[proc]; i<owners[proc+1]; i++) { 4753 if (ai[i+1] > ai[i]) nrows++; 4754 } 4755 len_si[proc] = 2*(nrows+1); 4756 len += len_si[proc]; 4757 } 4758 } 4759 4760 /* determine the number and length of messages to receive for ij-structure */ 4761 /*-------------------------------------------------------------------------*/ 4762 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4763 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4764 4765 /* post the Irecv of j-structure */ 4766 /*-------------------------------*/ 4767 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4768 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4769 4770 /* post the Isend of j-structure */ 4771 /*--------------------------------*/ 4772 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4773 4774 for (proc=0, k=0; proc<size; proc++) { 4775 if (!len_s[proc]) continue; 4776 i = owners[proc]; 4777 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4778 k++; 4779 } 4780 4781 /* receives and sends of j-structure are complete */ 4782 /*------------------------------------------------*/ 4783 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4784 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4785 4786 /* send and recv i-structure */ 4787 /*---------------------------*/ 4788 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4789 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4790 4791 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4792 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4793 for (proc=0,k=0; proc<size; proc++) { 4794 if (!len_s[proc]) continue; 4795 /* form outgoing message for i-structure: 4796 buf_si[0]: nrows to be sent 4797 [1:nrows]: row index (global) 4798 [nrows+1:2*nrows+1]: i-structure index 4799 */ 4800 /*-------------------------------------------*/ 4801 nrows = len_si[proc]/2 - 1; 4802 buf_si_i = buf_si + nrows+1; 4803 buf_si[0] = nrows; 4804 buf_si_i[0] = 0; 4805 nrows = 0; 4806 for (i=owners[proc]; i<owners[proc+1]; i++) { 4807 anzi = ai[i+1] - ai[i]; 4808 if (anzi) { 4809 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4810 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4811 nrows++; 4812 } 4813 } 4814 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4815 k++; 4816 buf_si += len_si[proc]; 4817 } 4818 4819 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4820 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4821 4822 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4823 for (i=0; i<merge->nrecv; i++) { 4824 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4825 } 4826 4827 ierr = PetscFree(len_si);CHKERRQ(ierr); 4828 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4829 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4830 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4831 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4832 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4833 ierr = PetscFree(status);CHKERRQ(ierr); 4834 4835 /* compute a local seq matrix in each processor */ 4836 /*----------------------------------------------*/ 4837 /* allocate bi array and free space for accumulating nonzero column info */ 4838 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4839 bi[0] = 0; 4840 4841 /* create and initialize a linked list */ 4842 nlnk = N+1; 4843 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4844 4845 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4846 len = ai[owners[rank+1]] - ai[owners[rank]]; 4847 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4848 4849 current_space = free_space; 4850 4851 /* determine symbolic info for each local row */ 4852 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4853 4854 for (k=0; k<merge->nrecv; k++) { 4855 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4856 nrows = *buf_ri_k[k]; 4857 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4858 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4859 } 4860 4861 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4862 len = 0; 4863 for (i=0; i<m; i++) { 4864 bnzi = 0; 4865 /* add local non-zero cols of this proc's seqmat into lnk */ 4866 arow = owners[rank] + i; 4867 anzi = ai[arow+1] - ai[arow]; 4868 aj = a->j + ai[arow]; 4869 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4870 bnzi += nlnk; 4871 /* add received col data into lnk */ 4872 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4873 if (i == *nextrow[k]) { /* i-th row */ 4874 anzi = *(nextai[k]+1) - *nextai[k]; 4875 aj = buf_rj[k] + *nextai[k]; 4876 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4877 bnzi += nlnk; 4878 nextrow[k]++; nextai[k]++; 4879 } 4880 } 4881 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4882 4883 /* if free space is not available, make more free space */ 4884 if (current_space->local_remaining<bnzi) { 4885 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4886 nspacedouble++; 4887 } 4888 /* copy data into free space, then initialize lnk */ 4889 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4890 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4891 4892 current_space->array += bnzi; 4893 current_space->local_used += bnzi; 4894 current_space->local_remaining -= bnzi; 4895 4896 bi[i+1] = bi[i] + bnzi; 4897 } 4898 4899 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4900 4901 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4902 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4903 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4904 4905 /* create symbolic parallel matrix B_mpi */ 4906 /*---------------------------------------*/ 4907 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4908 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4909 if (n==PETSC_DECIDE) { 4910 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4911 } else { 4912 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4913 } 4914 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4915 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4916 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4917 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4918 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4919 4920 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4921 B_mpi->assembled = PETSC_FALSE; 4922 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4923 merge->bi = bi; 4924 merge->bj = bj; 4925 merge->buf_ri = buf_ri; 4926 merge->buf_rj = buf_rj; 4927 merge->coi = NULL; 4928 merge->coj = NULL; 4929 merge->owners_co = NULL; 4930 4931 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4932 4933 /* attach the supporting struct to B_mpi for reuse */ 4934 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4935 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4936 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4937 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4938 *mpimat = B_mpi; 4939 4940 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4941 PetscFunctionReturn(0); 4942 } 4943 4944 #undef __FUNCT__ 4945 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4946 /*@C 4947 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4948 matrices from each processor 4949 4950 Collective on MPI_Comm 4951 4952 Input Parameters: 4953 + comm - the communicators the parallel matrix will live on 4954 . seqmat - the input sequential matrices 4955 . m - number of local rows (or PETSC_DECIDE) 4956 . n - number of local columns (or PETSC_DECIDE) 4957 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4958 4959 Output Parameter: 4960 . mpimat - the parallel matrix generated 4961 4962 Level: advanced 4963 4964 Notes: 4965 The dimensions of the sequential matrix in each processor MUST be the same. 4966 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4967 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4968 @*/ 4969 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4970 { 4971 PetscErrorCode ierr; 4972 PetscMPIInt size; 4973 4974 PetscFunctionBegin; 4975 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4976 if (size == 1) { 4977 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4978 if (scall == MAT_INITIAL_MATRIX) { 4979 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4980 } else { 4981 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4982 } 4983 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4984 PetscFunctionReturn(0); 4985 } 4986 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4987 if (scall == MAT_INITIAL_MATRIX) { 4988 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4989 } 4990 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4991 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4992 PetscFunctionReturn(0); 4993 } 4994 4995 #undef __FUNCT__ 4996 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4997 /*@ 4998 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4999 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5000 with MatGetSize() 5001 5002 Not Collective 5003 5004 Input Parameters: 5005 + A - the matrix 5006 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5007 5008 Output Parameter: 5009 . A_loc - the local sequential matrix generated 5010 5011 Level: developer 5012 5013 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5014 5015 @*/ 5016 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5017 { 5018 PetscErrorCode ierr; 5019 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5020 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5021 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5022 MatScalar *aa=a->a,*ba=b->a,*cam; 5023 PetscScalar *ca; 5024 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5025 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5026 PetscBool match; 5027 5028 PetscFunctionBegin; 5029 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5030 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5031 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5032 if (scall == MAT_INITIAL_MATRIX) { 5033 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5034 ci[0] = 0; 5035 for (i=0; i<am; i++) { 5036 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5037 } 5038 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5039 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5040 k = 0; 5041 for (i=0; i<am; i++) { 5042 ncols_o = bi[i+1] - bi[i]; 5043 ncols_d = ai[i+1] - ai[i]; 5044 /* off-diagonal portion of A */ 5045 for (jo=0; jo<ncols_o; jo++) { 5046 col = cmap[*bj]; 5047 if (col >= cstart) break; 5048 cj[k] = col; bj++; 5049 ca[k++] = *ba++; 5050 } 5051 /* diagonal portion of A */ 5052 for (j=0; j<ncols_d; j++) { 5053 cj[k] = cstart + *aj++; 5054 ca[k++] = *aa++; 5055 } 5056 /* off-diagonal portion of A */ 5057 for (j=jo; j<ncols_o; j++) { 5058 cj[k] = cmap[*bj++]; 5059 ca[k++] = *ba++; 5060 } 5061 } 5062 /* put together the new matrix */ 5063 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5064 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5065 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5066 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5067 mat->free_a = PETSC_TRUE; 5068 mat->free_ij = PETSC_TRUE; 5069 mat->nonew = 0; 5070 } else if (scall == MAT_REUSE_MATRIX) { 5071 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5072 ci = mat->i; cj = mat->j; cam = mat->a; 5073 for (i=0; i<am; i++) { 5074 /* off-diagonal portion of A */ 5075 ncols_o = bi[i+1] - bi[i]; 5076 for (jo=0; jo<ncols_o; jo++) { 5077 col = cmap[*bj]; 5078 if (col >= cstart) break; 5079 *cam++ = *ba++; bj++; 5080 } 5081 /* diagonal portion of A */ 5082 ncols_d = ai[i+1] - ai[i]; 5083 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5084 /* off-diagonal portion of A */ 5085 for (j=jo; j<ncols_o; j++) { 5086 *cam++ = *ba++; bj++; 5087 } 5088 } 5089 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5090 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5091 PetscFunctionReturn(0); 5092 } 5093 5094 #undef __FUNCT__ 5095 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5096 /*@C 5097 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5098 5099 Not Collective 5100 5101 Input Parameters: 5102 + A - the matrix 5103 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5104 - row, col - index sets of rows and columns to extract (or NULL) 5105 5106 Output Parameter: 5107 . A_loc - the local sequential matrix generated 5108 5109 Level: developer 5110 5111 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5112 5113 @*/ 5114 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5115 { 5116 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5117 PetscErrorCode ierr; 5118 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5119 IS isrowa,iscola; 5120 Mat *aloc; 5121 PetscBool match; 5122 5123 PetscFunctionBegin; 5124 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5125 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5126 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5127 if (!row) { 5128 start = A->rmap->rstart; end = A->rmap->rend; 5129 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5130 } else { 5131 isrowa = *row; 5132 } 5133 if (!col) { 5134 start = A->cmap->rstart; 5135 cmap = a->garray; 5136 nzA = a->A->cmap->n; 5137 nzB = a->B->cmap->n; 5138 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5139 ncols = 0; 5140 for (i=0; i<nzB; i++) { 5141 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5142 else break; 5143 } 5144 imark = i; 5145 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5146 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5147 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5148 } else { 5149 iscola = *col; 5150 } 5151 if (scall != MAT_INITIAL_MATRIX) { 5152 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5153 aloc[0] = *A_loc; 5154 } 5155 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5156 *A_loc = aloc[0]; 5157 ierr = PetscFree(aloc);CHKERRQ(ierr); 5158 if (!row) { 5159 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5160 } 5161 if (!col) { 5162 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5163 } 5164 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5165 PetscFunctionReturn(0); 5166 } 5167 5168 #undef __FUNCT__ 5169 #define __FUNCT__ "MatGetBrowsOfAcols" 5170 /*@C 5171 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5172 5173 Collective on Mat 5174 5175 Input Parameters: 5176 + A,B - the matrices in mpiaij format 5177 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5178 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5179 5180 Output Parameter: 5181 + rowb, colb - index sets of rows and columns of B to extract 5182 - B_seq - the sequential matrix generated 5183 5184 Level: developer 5185 5186 @*/ 5187 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5188 { 5189 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5190 PetscErrorCode ierr; 5191 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5192 IS isrowb,iscolb; 5193 Mat *bseq=NULL; 5194 5195 PetscFunctionBegin; 5196 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5197 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5198 } 5199 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5200 5201 if (scall == MAT_INITIAL_MATRIX) { 5202 start = A->cmap->rstart; 5203 cmap = a->garray; 5204 nzA = a->A->cmap->n; 5205 nzB = a->B->cmap->n; 5206 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5207 ncols = 0; 5208 for (i=0; i<nzB; i++) { /* row < local row index */ 5209 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5210 else break; 5211 } 5212 imark = i; 5213 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5214 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5215 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5216 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5217 } else { 5218 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5219 isrowb = *rowb; iscolb = *colb; 5220 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5221 bseq[0] = *B_seq; 5222 } 5223 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5224 *B_seq = bseq[0]; 5225 ierr = PetscFree(bseq);CHKERRQ(ierr); 5226 if (!rowb) { 5227 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5228 } else { 5229 *rowb = isrowb; 5230 } 5231 if (!colb) { 5232 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5233 } else { 5234 *colb = iscolb; 5235 } 5236 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5237 PetscFunctionReturn(0); 5238 } 5239 5240 #undef __FUNCT__ 5241 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5242 /* 5243 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5244 of the OFF-DIAGONAL portion of local A 5245 5246 Collective on Mat 5247 5248 Input Parameters: 5249 + A,B - the matrices in mpiaij format 5250 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5251 5252 Output Parameter: 5253 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5254 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5255 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5256 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5257 5258 Level: developer 5259 5260 */ 5261 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5262 { 5263 VecScatter_MPI_General *gen_to,*gen_from; 5264 PetscErrorCode ierr; 5265 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5266 Mat_SeqAIJ *b_oth; 5267 VecScatter ctx =a->Mvctx; 5268 MPI_Comm comm=((PetscObject)ctx)->comm; 5269 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5270 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5271 PetscScalar *rvalues,*svalues; 5272 MatScalar *b_otha,*bufa,*bufA; 5273 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5274 MPI_Request *rwaits = NULL,*swaits = NULL; 5275 MPI_Status *sstatus,rstatus; 5276 PetscMPIInt jj; 5277 PetscInt *cols,sbs,rbs; 5278 PetscScalar *vals; 5279 5280 PetscFunctionBegin; 5281 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5282 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5283 } 5284 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5285 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5286 5287 gen_to = (VecScatter_MPI_General*)ctx->todata; 5288 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5289 rvalues = gen_from->values; /* holds the length of receiving row */ 5290 svalues = gen_to->values; /* holds the length of sending row */ 5291 nrecvs = gen_from->n; 5292 nsends = gen_to->n; 5293 5294 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5295 srow = gen_to->indices; /* local row index to be sent */ 5296 sstarts = gen_to->starts; 5297 sprocs = gen_to->procs; 5298 sstatus = gen_to->sstatus; 5299 sbs = gen_to->bs; 5300 rstarts = gen_from->starts; 5301 rprocs = gen_from->procs; 5302 rbs = gen_from->bs; 5303 5304 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5305 if (scall == MAT_INITIAL_MATRIX) { 5306 /* i-array */ 5307 /*---------*/ 5308 /* post receives */ 5309 for (i=0; i<nrecvs; i++) { 5310 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5311 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5312 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5313 } 5314 5315 /* pack the outgoing message */ 5316 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5317 5318 sstartsj[0] = 0; 5319 rstartsj[0] = 0; 5320 len = 0; /* total length of j or a array to be sent */ 5321 k = 0; 5322 for (i=0; i<nsends; i++) { 5323 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5324 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5325 for (j=0; j<nrows; j++) { 5326 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5327 for (l=0; l<sbs; l++) { 5328 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5329 5330 rowlen[j*sbs+l] = ncols; 5331 5332 len += ncols; 5333 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5334 } 5335 k++; 5336 } 5337 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5338 5339 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5340 } 5341 /* recvs and sends of i-array are completed */ 5342 i = nrecvs; 5343 while (i--) { 5344 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5345 } 5346 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5347 5348 /* allocate buffers for sending j and a arrays */ 5349 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5350 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5351 5352 /* create i-array of B_oth */ 5353 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5354 5355 b_othi[0] = 0; 5356 len = 0; /* total length of j or a array to be received */ 5357 k = 0; 5358 for (i=0; i<nrecvs; i++) { 5359 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5360 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5361 for (j=0; j<nrows; j++) { 5362 b_othi[k+1] = b_othi[k] + rowlen[j]; 5363 len += rowlen[j]; k++; 5364 } 5365 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5366 } 5367 5368 /* allocate space for j and a arrrays of B_oth */ 5369 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5370 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5371 5372 /* j-array */ 5373 /*---------*/ 5374 /* post receives of j-array */ 5375 for (i=0; i<nrecvs; i++) { 5376 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5377 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5378 } 5379 5380 /* pack the outgoing message j-array */ 5381 k = 0; 5382 for (i=0; i<nsends; i++) { 5383 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5384 bufJ = bufj+sstartsj[i]; 5385 for (j=0; j<nrows; j++) { 5386 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5387 for (ll=0; ll<sbs; ll++) { 5388 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5389 for (l=0; l<ncols; l++) { 5390 *bufJ++ = cols[l]; 5391 } 5392 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5393 } 5394 } 5395 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5396 } 5397 5398 /* recvs and sends of j-array are completed */ 5399 i = nrecvs; 5400 while (i--) { 5401 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5402 } 5403 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5404 } else if (scall == MAT_REUSE_MATRIX) { 5405 sstartsj = *startsj_s; 5406 rstartsj = *startsj_r; 5407 bufa = *bufa_ptr; 5408 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5409 b_otha = b_oth->a; 5410 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5411 5412 /* a-array */ 5413 /*---------*/ 5414 /* post receives of a-array */ 5415 for (i=0; i<nrecvs; i++) { 5416 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5417 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5418 } 5419 5420 /* pack the outgoing message a-array */ 5421 k = 0; 5422 for (i=0; i<nsends; i++) { 5423 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5424 bufA = bufa+sstartsj[i]; 5425 for (j=0; j<nrows; j++) { 5426 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5427 for (ll=0; ll<sbs; ll++) { 5428 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5429 for (l=0; l<ncols; l++) { 5430 *bufA++ = vals[l]; 5431 } 5432 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5433 } 5434 } 5435 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5436 } 5437 /* recvs and sends of a-array are completed */ 5438 i = nrecvs; 5439 while (i--) { 5440 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5441 } 5442 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5443 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5444 5445 if (scall == MAT_INITIAL_MATRIX) { 5446 /* put together the new matrix */ 5447 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5448 5449 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5450 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5451 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5452 b_oth->free_a = PETSC_TRUE; 5453 b_oth->free_ij = PETSC_TRUE; 5454 b_oth->nonew = 0; 5455 5456 ierr = PetscFree(bufj);CHKERRQ(ierr); 5457 if (!startsj_s || !bufa_ptr) { 5458 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5459 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5460 } else { 5461 *startsj_s = sstartsj; 5462 *startsj_r = rstartsj; 5463 *bufa_ptr = bufa; 5464 } 5465 } 5466 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5467 PetscFunctionReturn(0); 5468 } 5469 5470 #undef __FUNCT__ 5471 #define __FUNCT__ "MatGetCommunicationStructs" 5472 /*@C 5473 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5474 5475 Not Collective 5476 5477 Input Parameters: 5478 . A - The matrix in mpiaij format 5479 5480 Output Parameter: 5481 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5482 . colmap - A map from global column index to local index into lvec 5483 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5484 5485 Level: developer 5486 5487 @*/ 5488 #if defined(PETSC_USE_CTABLE) 5489 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5490 #else 5491 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5492 #endif 5493 { 5494 Mat_MPIAIJ *a; 5495 5496 PetscFunctionBegin; 5497 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5498 PetscValidPointer(lvec, 2); 5499 PetscValidPointer(colmap, 3); 5500 PetscValidPointer(multScatter, 4); 5501 a = (Mat_MPIAIJ*) A->data; 5502 if (lvec) *lvec = a->lvec; 5503 if (colmap) *colmap = a->colmap; 5504 if (multScatter) *multScatter = a->Mvctx; 5505 PetscFunctionReturn(0); 5506 } 5507 5508 EXTERN_C_BEGIN 5509 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5510 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5511 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5512 EXTERN_C_END 5513 5514 #undef __FUNCT__ 5515 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5516 /* 5517 Computes (B'*A')' since computing B*A directly is untenable 5518 5519 n p p 5520 ( ) ( ) ( ) 5521 m ( A ) * n ( B ) = m ( C ) 5522 ( ) ( ) ( ) 5523 5524 */ 5525 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5526 { 5527 PetscErrorCode ierr; 5528 Mat At,Bt,Ct; 5529 5530 PetscFunctionBegin; 5531 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5532 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5533 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5534 ierr = MatDestroy(&At);CHKERRQ(ierr); 5535 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5536 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5537 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5538 PetscFunctionReturn(0); 5539 } 5540 5541 #undef __FUNCT__ 5542 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5543 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5544 { 5545 PetscErrorCode ierr; 5546 PetscInt m=A->rmap->n,n=B->cmap->n; 5547 Mat Cmat; 5548 5549 PetscFunctionBegin; 5550 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5551 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5552 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5553 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5554 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5555 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5556 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5557 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5558 5559 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5560 5561 *C = Cmat; 5562 PetscFunctionReturn(0); 5563 } 5564 5565 /* ----------------------------------------------------------------*/ 5566 #undef __FUNCT__ 5567 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5568 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5569 { 5570 PetscErrorCode ierr; 5571 5572 PetscFunctionBegin; 5573 if (scall == MAT_INITIAL_MATRIX) { 5574 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5575 } 5576 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5577 PetscFunctionReturn(0); 5578 } 5579 5580 EXTERN_C_BEGIN 5581 #if defined(PETSC_HAVE_MUMPS) 5582 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5583 #endif 5584 #if defined(PETSC_HAVE_PASTIX) 5585 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5586 #endif 5587 #if defined(PETSC_HAVE_SUPERLU_DIST) 5588 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5589 #endif 5590 #if defined(PETSC_HAVE_CLIQUE) 5591 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5592 #endif 5593 EXTERN_C_END 5594 5595 /*MC 5596 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5597 5598 Options Database Keys: 5599 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5600 5601 Level: beginner 5602 5603 .seealso: MatCreateAIJ() 5604 M*/ 5605 5606 EXTERN_C_BEGIN 5607 #undef __FUNCT__ 5608 #define __FUNCT__ "MatCreate_MPIAIJ" 5609 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5610 { 5611 Mat_MPIAIJ *b; 5612 PetscErrorCode ierr; 5613 PetscMPIInt size; 5614 5615 PetscFunctionBegin; 5616 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5617 5618 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5619 B->data = (void*)b; 5620 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5621 B->assembled = PETSC_FALSE; 5622 B->insertmode = NOT_SET_VALUES; 5623 b->size = size; 5624 5625 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5626 5627 /* build cache for off array entries formed */ 5628 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5629 5630 b->donotstash = PETSC_FALSE; 5631 b->colmap = 0; 5632 b->garray = 0; 5633 b->roworiented = PETSC_TRUE; 5634 5635 /* stuff used for matrix vector multiply */ 5636 b->lvec = NULL; 5637 b->Mvctx = NULL; 5638 5639 /* stuff for MatGetRow() */ 5640 b->rowindices = 0; 5641 b->rowvalues = 0; 5642 b->getrowactive = PETSC_FALSE; 5643 5644 /* flexible pointer used in CUSP/CUSPARSE classes */ 5645 b->spptr = NULL; 5646 5647 #if defined(PETSC_HAVE_MUMPS) 5648 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5649 "MatGetFactor_aij_mumps", 5650 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5651 #endif 5652 #if defined(PETSC_HAVE_PASTIX) 5653 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5654 "MatGetFactor_mpiaij_pastix", 5655 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5656 #endif 5657 #if defined(PETSC_HAVE_SUPERLU_DIST) 5658 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5659 "MatGetFactor_mpiaij_superlu_dist", 5660 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5661 #endif 5662 #if defined(PETSC_HAVE_CLIQUE) 5663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5664 "MatGetFactor_aij_clique", 5665 MatGetFactor_aij_clique);CHKERRQ(ierr); 5666 #endif 5667 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5668 "MatStoreValues_MPIAIJ", 5669 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5670 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5671 "MatRetrieveValues_MPIAIJ", 5672 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5673 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5674 "MatGetDiagonalBlock_MPIAIJ", 5675 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5677 "MatIsTranspose_MPIAIJ", 5678 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5679 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5680 "MatMPIAIJSetPreallocation_MPIAIJ", 5681 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5682 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5683 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5684 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5685 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5686 "MatDiagonalScaleLocal_MPIAIJ", 5687 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5688 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5689 "MatConvert_MPIAIJ_MPIAIJPERM", 5690 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5691 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5692 "MatConvert_MPIAIJ_MPIAIJCRL", 5693 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5694 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5695 "MatConvert_MPIAIJ_MPISBAIJ", 5696 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5697 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5698 "MatMatMult_MPIDense_MPIAIJ", 5699 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5700 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5701 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5702 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5703 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5704 "MatMatMultNumeric_MPIDense_MPIAIJ", 5705 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5706 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5707 PetscFunctionReturn(0); 5708 } 5709 EXTERN_C_END 5710 5711 #undef __FUNCT__ 5712 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5713 /*@ 5714 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5715 and "off-diagonal" part of the matrix in CSR format. 5716 5717 Collective on MPI_Comm 5718 5719 Input Parameters: 5720 + comm - MPI communicator 5721 . m - number of local rows (Cannot be PETSC_DECIDE) 5722 . n - This value should be the same as the local size used in creating the 5723 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5724 calculated if N is given) For square matrices n is almost always m. 5725 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5726 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5727 . i - row indices for "diagonal" portion of matrix 5728 . j - column indices 5729 . a - matrix values 5730 . oi - row indices for "off-diagonal" portion of matrix 5731 . oj - column indices 5732 - oa - matrix values 5733 5734 Output Parameter: 5735 . mat - the matrix 5736 5737 Level: advanced 5738 5739 Notes: 5740 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5741 must free the arrays once the matrix has been destroyed and not before. 5742 5743 The i and j indices are 0 based 5744 5745 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5746 5747 This sets local rows and cannot be used to set off-processor values. 5748 5749 You cannot later use MatSetValues() to change values in this matrix. 5750 5751 .keywords: matrix, aij, compressed row, sparse, parallel 5752 5753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5754 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5755 @*/ 5756 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5757 { 5758 PetscErrorCode ierr; 5759 Mat_MPIAIJ *maij; 5760 5761 PetscFunctionBegin; 5762 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5763 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5764 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5765 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5766 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5767 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5768 maij = (Mat_MPIAIJ*) (*mat)->data; 5769 5770 maij->donotstash = PETSC_TRUE; 5771 (*mat)->preallocated = PETSC_TRUE; 5772 5773 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5774 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5775 5776 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5777 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5778 5779 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5780 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5781 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5782 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5783 5784 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5785 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5786 PetscFunctionReturn(0); 5787 } 5788 5789 /* 5790 Special version for direct calls from Fortran 5791 */ 5792 #include <petsc-private/fortranimpl.h> 5793 5794 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5795 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5796 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5797 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5798 #endif 5799 5800 /* Change these macros so can be used in void function */ 5801 #undef CHKERRQ 5802 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5803 #undef SETERRQ2 5804 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5805 #undef SETERRQ3 5806 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5807 #undef SETERRQ 5808 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5809 5810 EXTERN_C_BEGIN 5811 #undef __FUNCT__ 5812 #define __FUNCT__ "matsetvaluesmpiaij_" 5813 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5814 { 5815 Mat mat = *mmat; 5816 PetscInt m = *mm, n = *mn; 5817 InsertMode addv = *maddv; 5818 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5819 PetscScalar value; 5820 PetscErrorCode ierr; 5821 5822 MatCheckPreallocated(mat,1); 5823 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5824 5825 #if defined(PETSC_USE_DEBUG) 5826 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5827 #endif 5828 { 5829 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5830 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5831 PetscBool roworiented = aij->roworiented; 5832 5833 /* Some Variables required in the macro */ 5834 Mat A = aij->A; 5835 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5836 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5837 MatScalar *aa = a->a; 5838 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5839 Mat B = aij->B; 5840 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5841 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5842 MatScalar *ba = b->a; 5843 5844 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5845 PetscInt nonew = a->nonew; 5846 MatScalar *ap1,*ap2; 5847 5848 PetscFunctionBegin; 5849 for (i=0; i<m; i++) { 5850 if (im[i] < 0) continue; 5851 #if defined(PETSC_USE_DEBUG) 5852 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5853 #endif 5854 if (im[i] >= rstart && im[i] < rend) { 5855 row = im[i] - rstart; 5856 lastcol1 = -1; 5857 rp1 = aj + ai[row]; 5858 ap1 = aa + ai[row]; 5859 rmax1 = aimax[row]; 5860 nrow1 = ailen[row]; 5861 low1 = 0; 5862 high1 = nrow1; 5863 lastcol2 = -1; 5864 rp2 = bj + bi[row]; 5865 ap2 = ba + bi[row]; 5866 rmax2 = bimax[row]; 5867 nrow2 = bilen[row]; 5868 low2 = 0; 5869 high2 = nrow2; 5870 5871 for (j=0; j<n; j++) { 5872 if (roworiented) value = v[i*n+j]; 5873 else value = v[i+j*m]; 5874 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5875 if (in[j] >= cstart && in[j] < cend) { 5876 col = in[j] - cstart; 5877 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5878 } else if (in[j] < 0) continue; 5879 #if defined(PETSC_USE_DEBUG) 5880 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5881 #endif 5882 else { 5883 if (mat->was_assembled) { 5884 if (!aij->colmap) { 5885 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5886 } 5887 #if defined(PETSC_USE_CTABLE) 5888 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5889 col--; 5890 #else 5891 col = aij->colmap[in[j]] - 1; 5892 #endif 5893 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5894 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5895 col = in[j]; 5896 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5897 B = aij->B; 5898 b = (Mat_SeqAIJ*)B->data; 5899 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5900 rp2 = bj + bi[row]; 5901 ap2 = ba + bi[row]; 5902 rmax2 = bimax[row]; 5903 nrow2 = bilen[row]; 5904 low2 = 0; 5905 high2 = nrow2; 5906 bm = aij->B->rmap->n; 5907 ba = b->a; 5908 } 5909 } else col = in[j]; 5910 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5911 } 5912 } 5913 } else if (!aij->donotstash) { 5914 if (roworiented) { 5915 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5916 } else { 5917 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5918 } 5919 } 5920 } 5921 } 5922 PetscFunctionReturnVoid(); 5923 } 5924 EXTERN_C_END 5925 5926