1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 716 717 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 718 719 aij->rowvalues = 0; 720 721 /* used by MatAXPY() */ 722 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 723 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 724 725 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 726 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 750 PetscInt i,*owners = A->rmap->range; 751 PetscInt *nprocs,j,idx,nsends,row; 752 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 753 PetscInt *rvalues,count,base,slen,*source; 754 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 755 MPI_Comm comm; 756 MPI_Request *send_waits,*recv_waits; 757 MPI_Status recv_status,*send_status; 758 const PetscScalar *xx; 759 PetscScalar *bb; 760 #if defined(PETSC_DEBUG) 761 PetscBool found = PETSC_FALSE; 762 #endif 763 764 PetscFunctionBegin; 765 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 766 /* first count number of contributors to each processor */ 767 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 768 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 769 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 770 j = 0; 771 for (i=0; i<N; i++) { 772 if (lastidx > (idx = rows[i])) j = 0; 773 lastidx = idx; 774 for (; j<size; j++) { 775 if (idx >= owners[j] && idx < owners[j+1]) { 776 nprocs[2*j]++; 777 nprocs[2*j+1] = 1; 778 owner[i] = j; 779 #if defined(PETSC_DEBUG) 780 found = PETSC_TRUE; 781 #endif 782 break; 783 } 784 } 785 #if defined(PETSC_DEBUG) 786 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 787 found = PETSC_FALSE; 788 #endif 789 } 790 nsends = 0; 791 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 792 793 if (A->nooffproczerorows) { 794 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 795 nrecvs = nsends; 796 nmax = N; 797 } else { 798 /* inform other processors of number of messages and max length*/ 799 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 800 } 801 802 /* post receives: */ 803 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 804 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 805 for (i=0; i<nrecvs; i++) { 806 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 807 } 808 809 /* do sends: 810 1) starts[i] gives the starting index in svalues for stuff going to 811 the ith processor 812 */ 813 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 814 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 815 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 816 817 starts[0] = 0; 818 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 819 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 820 821 starts[0] = 0; 822 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 823 count = 0; 824 for (i=0; i<size; i++) { 825 if (nprocs[2*i+1]) { 826 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 827 } 828 } 829 ierr = PetscFree(starts);CHKERRQ(ierr); 830 831 base = owners[rank]; 832 833 /* wait on receives */ 834 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 835 count = nrecvs; slen = 0; 836 while (count) { 837 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 838 /* unpack receives into our local space */ 839 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 840 841 source[imdex] = recv_status.MPI_SOURCE; 842 lens[imdex] = n; 843 slen += n; 844 count--; 845 } 846 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 847 848 /* move the data into the send scatter */ 849 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 850 count = 0; 851 for (i=0; i<nrecvs; i++) { 852 values = rvalues + i*nmax; 853 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 854 } 855 ierr = PetscFree(rvalues);CHKERRQ(ierr); 856 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 857 ierr = PetscFree(owner);CHKERRQ(ierr); 858 ierr = PetscFree(nprocs);CHKERRQ(ierr); 859 860 /* fix right hand side if needed */ 861 if (x && b) { 862 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 864 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 865 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 866 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 867 } 868 /* 869 Zero the required rows. If the "diagonal block" of the matrix 870 is square and the user wishes to set the diagonal we use separate 871 code so that MatSetValues() is not called for each diagonal allocating 872 new memory, thus calling lots of mallocs and slowing things down. 873 874 */ 875 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 876 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 877 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 878 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 879 } else if (diag != 0.0) { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 931 /* first count number of contributors to each processor */ 932 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 933 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 934 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 935 j = 0; 936 for (i=0; i<N; i++) { 937 if (lastidx > (idx = rows[i])) j = 0; 938 lastidx = idx; 939 for (; j<size; j++) { 940 if (idx >= owners[j] && idx < owners[j+1]) { 941 nprocs[2*j]++; 942 nprocs[2*j+1] = 1; 943 owner[i] = j; 944 #if defined(PETSC_DEBUG) 945 found = PETSC_TRUE; 946 #endif 947 break; 948 } 949 } 950 #if defined(PETSC_DEBUG) 951 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 952 found = PETSC_FALSE; 953 #endif 954 } 955 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 956 957 /* inform other processors of number of messages and max length*/ 958 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 959 960 /* post receives: */ 961 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 963 for (i=0; i<nrecvs; i++) { 964 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 965 } 966 967 /* do sends: 968 1) starts[i] gives the starting index in svalues for stuff going to 969 the ith processor 970 */ 971 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 972 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 973 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 974 975 starts[0] = 0; 976 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 999 source[imdex] = recv_status.MPI_SOURCE; 1000 lens[imdex] = n; 1001 slen += n; 1002 count--; 1003 } 1004 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1005 1006 /* move the data into the send scatter */ 1007 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1008 count = 0; 1009 for (i=0; i<nrecvs; i++) { 1010 values = rvalues + i*nmax; 1011 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1012 } 1013 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1014 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1015 ierr = PetscFree(owner);CHKERRQ(ierr); 1016 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1017 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1018 1019 /* zero diagonal part of matrix */ 1020 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1021 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x) { 1032 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1035 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1036 } 1037 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1038 1039 /* remove zeroed rows of off diagonal matrix */ 1040 ii = aij->i; 1041 for (i=0; i<slen; i++) { 1042 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1043 } 1044 1045 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1046 if (aij->compressedrow.use) { 1047 m = aij->compressedrow.nrows; 1048 ii = aij->compressedrow.i; 1049 ridx = aij->compressedrow.rindex; 1050 for (i=0; i<m; i++) { 1051 n = ii[i+1] - ii[i]; 1052 aj = aij->j + ii[i]; 1053 aa = aij->a + ii[i]; 1054 1055 for (j=0; j<n; j++) { 1056 if (PetscAbsScalar(mask[*aj])) { 1057 if (b) bb[*ridx] -= *aa*xx[*aj]; 1058 *aa = 0.0; 1059 } 1060 aa++; 1061 aj++; 1062 } 1063 ridx++; 1064 } 1065 } else { /* do not use compressed row format */ 1066 m = l->B->rmap->n; 1067 for (i=0; i<m; i++) { 1068 n = ii[i+1] - ii[i]; 1069 aj = aij->j + ii[i]; 1070 aa = aij->a + ii[i]; 1071 for (j=0; j<n; j++) { 1072 if (PetscAbsScalar(mask[*aj])) { 1073 if (b) bb[i] -= *aa*xx[*aj]; 1074 *aa = 0.0; 1075 } 1076 aa++; 1077 aj++; 1078 } 1079 } 1080 } 1081 if (x) { 1082 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1083 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1084 } 1085 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1086 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1087 ierr = PetscFree(lrows);CHKERRQ(ierr); 1088 1089 /* wait on sends */ 1090 if (nsends) { 1091 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1092 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1093 ierr = PetscFree(send_status);CHKERRQ(ierr); 1094 } 1095 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1096 ierr = PetscFree(svalues);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 /* do nondiagonal part */ 1227 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1228 /* send it on its way */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 /* do local part */ 1231 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1232 /* receive remote parts */ 1233 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* 1238 This only works correctly for square matrices where the subblock A->A is the 1239 diagonal block 1240 */ 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1244 { 1245 PetscErrorCode ierr; 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1250 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1251 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatScale_MPIAIJ" 1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1258 { 1259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1264 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_MPIAIJ" 1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1278 #endif 1279 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1283 #if defined(PETSC_USE_CTABLE) 1284 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1285 #else 1286 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1287 #endif 1288 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1289 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1290 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1291 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1292 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1313 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1316 int fd; 1317 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1318 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1319 PetscScalar *column_values; 1320 PetscInt message_count,flowcontrolcount; 1321 FILE *file; 1322 1323 PetscFunctionBegin; 1324 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1326 nz = A->nz + B->nz; 1327 if (!rank) { 1328 header[0] = MAT_FILE_CLASSID; 1329 header[1] = mat->rmap->N; 1330 header[2] = mat->cmap->N; 1331 1332 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1333 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 /* get largest number of rows any processor has */ 1336 rlen = mat->rmap->n; 1337 range = mat->rmap->range; 1338 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1339 } else { 1340 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 rlen = mat->rmap->n; 1342 } 1343 1344 /* load up the local row counts */ 1345 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1346 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 1348 /* store the row lengths to the file */ 1349 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1350 if (!rank) { 1351 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 for (i=1; i<size; i++) { 1353 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1354 rlen = range[i+1] - range[i]; 1355 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1361 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1363 } 1364 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1365 1366 /* load up the local column indices */ 1367 nzmax = nz; /* th processor needs space a largest processor needs */ 1368 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1370 cnt = 0; 1371 for (i=0; i<mat->rmap->n; i++) { 1372 for (j=B->i[i]; j<B->i[i+1]; j++) { 1373 if ((col = garray[B->j[j]]) > cstart) break; 1374 column_indices[cnt++] = col; 1375 } 1376 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1377 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1378 } 1379 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1380 1381 /* store the column indices to the file */ 1382 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1383 if (!rank) { 1384 MPI_Status status; 1385 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1386 for (i=1; i<size; i++) { 1387 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1388 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1389 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1390 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1394 } else { 1395 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1396 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1401 1402 /* load up the local column values */ 1403 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1404 cnt = 0; 1405 for (i=0; i<mat->rmap->n; i++) { 1406 for (j=B->i[i]; j<B->i[i+1]; j++) { 1407 if (garray[B->j[j]] > cstart) break; 1408 column_values[cnt++] = B->a[j]; 1409 } 1410 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1411 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1412 } 1413 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1414 1415 /* store the column values to the file */ 1416 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1417 if (!rank) { 1418 MPI_Status status; 1419 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1420 for (i=1; i<size; i++) { 1421 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1422 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1423 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1424 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1426 } 1427 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1430 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(column_values);CHKERRQ(ierr); 1435 1436 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1437 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #include <petscdraw.h> 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1445 { 1446 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1447 PetscErrorCode ierr; 1448 PetscMPIInt rank = aij->rank,size = aij->size; 1449 PetscBool isdraw,iascii,isbinary; 1450 PetscViewer sviewer; 1451 PetscViewerFormat format; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 if (iascii) { 1458 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1459 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1460 MatInfo info; 1461 PetscBool inodes; 1462 1463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1464 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1465 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 if (!inodes) { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1472 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1473 } 1474 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1481 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1484 PetscInt inodecount,inodelimit,*inodes; 1485 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1486 if (inodes) { 1487 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1488 } else { 1489 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } 1495 } else if (isbinary) { 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (isdraw) { 1504 PetscDraw draw; 1505 PetscBool isnull; 1506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1508 } 1509 1510 if (size == 1) { 1511 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1513 } else { 1514 /* assemble the entire matrix onto first processor. */ 1515 Mat A; 1516 Mat_SeqAIJ *Aloc; 1517 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1518 MatScalar *a; 1519 1520 if (mat->rmap->N > 1024) { 1521 PetscBool flg = PETSC_FALSE; 1522 1523 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1525 } 1526 1527 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1532 } 1533 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1534 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1535 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1536 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1537 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqAIJ*)aij->A->data; 1541 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1546 row++; 1547 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1548 } 1549 aj = Aloc->j; 1550 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1551 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqAIJ*)aij->B->data; 1554 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1557 ct = cols; 1558 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1559 for (i=0; i<m; i++) { 1560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1561 row++; 1562 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1563 } 1564 ierr = PetscFree(ct);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 /* 1568 Everyone has to call to draw the matrix since the graphics waits are 1569 synchronized across all processors that share the PetscDraw object 1570 */ 1571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1574 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1575 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1577 } 1578 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_MPIAIJ" 1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1587 { 1588 PetscErrorCode ierr; 1589 PetscBool iascii,isdraw,issocket,isbinary; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1596 if (iascii || isdraw || isbinary || issocket) { 1597 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatSOR_MPIAIJ" 1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1607 PetscErrorCode ierr; 1608 Vec bb1 = 0; 1609 PetscBool hasop; 1610 1611 PetscFunctionBegin; 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1618 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1619 } 1620 1621 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1657 its--; 1658 } 1659 while (its--) { 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 1663 /* update rhs: bb1 = bb - B*x */ 1664 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1665 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1666 1667 /* local sweep */ 1668 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1669 } 1670 } else if (flag & SOR_EISENSTAT) { 1671 Vec xx1; 1672 1673 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1675 1676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 if (!mat->diag) { 1679 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1680 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1681 } 1682 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1683 if (hasop) { 1684 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1685 } else { 1686 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1687 } 1688 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1689 1690 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1691 1692 /* local sweep */ 1693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1694 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1695 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1696 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1697 1698 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatPermute_MPIAIJ" 1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1705 { 1706 Mat aA,aB,Aperm; 1707 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1708 PetscScalar *aa,*ba; 1709 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1710 PetscSF rowsf,sf; 1711 IS parcolp = NULL; 1712 PetscBool done; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1717 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1718 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1719 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1720 1721 /* Invert row permutation to find out where my rows should go */ 1722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1723 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1724 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1725 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1726 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1728 1729 /* Invert column permutation to find out where my columns should go */ 1730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1731 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1732 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1733 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1734 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1737 1738 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1740 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1741 1742 /* Find out where my gcols should go */ 1743 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1744 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1751 1752 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1757 for (i=0; i<m; i++) { 1758 PetscInt row = rdest[i],rowner; 1759 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1760 for (j=ai[i]; j<ai[i+1]; j++) { 1761 PetscInt cowner,col = cdest[aj[j]]; 1762 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1763 if (rowner == cowner) dnnz[i]++; 1764 else onnz[i]++; 1765 } 1766 for (j=bi[i]; j<bi[i+1]; j++) { 1767 PetscInt cowner,col = gcdest[bj[j]]; 1768 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1769 if (rowner == cowner) dnnz[i]++; 1770 else onnz[i]++; 1771 } 1772 } 1773 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1780 1781 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1784 for (i=0; i<m; i++) { 1785 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1786 PetscInt rowlen; 1787 rowlen = ai[i+1] - ai[i]; 1788 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1790 rowlen = bi[i+1] - bi[i]; 1791 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1792 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1800 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1802 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1803 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1804 *B = Aperm; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1813 Mat A = mat->A,B = mat->B; 1814 PetscErrorCode ierr; 1815 PetscReal isend[5],irecv[5]; 1816 1817 PetscFunctionBegin; 1818 info->block_size = 1.0; 1819 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1820 1821 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1822 isend[3] = info->memory; isend[4] = info->mallocs; 1823 1824 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1825 1826 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1827 isend[3] += info->memory; isend[4] += info->mallocs; 1828 if (flag == MAT_LOCAL) { 1829 info->nz_used = isend[0]; 1830 info->nz_allocated = isend[1]; 1831 info->nz_unneeded = isend[2]; 1832 info->memory = isend[3]; 1833 info->mallocs = isend[4]; 1834 } else if (flag == MAT_GLOBAL_MAX) { 1835 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1836 1837 info->nz_used = irecv[0]; 1838 info->nz_allocated = irecv[1]; 1839 info->nz_unneeded = irecv[2]; 1840 info->memory = irecv[3]; 1841 info->mallocs = irecv[4]; 1842 } else if (flag == MAT_GLOBAL_SUM) { 1843 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1844 1845 info->nz_used = irecv[0]; 1846 info->nz_allocated = irecv[1]; 1847 info->nz_unneeded = irecv[2]; 1848 info->memory = irecv[3]; 1849 info->mallocs = irecv[4]; 1850 } 1851 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1852 info->fill_ratio_needed = 0; 1853 info->factor_mallocs = 0; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetOption_MPIAIJ" 1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1860 { 1861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 switch (op) { 1866 case MAT_NEW_NONZERO_LOCATIONS: 1867 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1868 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1869 case MAT_KEEP_NONZERO_PATTERN: 1870 case MAT_NEW_NONZERO_LOCATION_ERR: 1871 case MAT_USE_INODES: 1872 case MAT_IGNORE_ZERO_ENTRIES: 1873 MatCheckPreallocated(A,1); 1874 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1875 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1876 break; 1877 case MAT_ROW_ORIENTED: 1878 a->roworiented = flg; 1879 1880 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1881 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1882 break; 1883 case MAT_NEW_DIAGONALS: 1884 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1885 break; 1886 case MAT_IGNORE_OFF_PROC_ENTRIES: 1887 a->donotstash = flg; 1888 break; 1889 case MAT_SPD: 1890 A->spd_set = PETSC_TRUE; 1891 A->spd = flg; 1892 if (flg) { 1893 A->symmetric = PETSC_TRUE; 1894 A->structurally_symmetric = PETSC_TRUE; 1895 A->symmetric_set = PETSC_TRUE; 1896 A->structurally_symmetric_set = PETSC_TRUE; 1897 } 1898 break; 1899 case MAT_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_STRUCTURALLY_SYMMETRIC: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_HERMITIAN: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 case MAT_SYMMETRY_ETERNAL: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 default: 1912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRow_MPIAIJ" 1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1922 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1923 PetscErrorCode ierr; 1924 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1925 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1926 PetscInt *cmap,*idx_p; 1927 1928 PetscFunctionBegin; 1929 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1930 mat->getrowactive = PETSC_TRUE; 1931 1932 if (!mat->rowvalues && (idx || v)) { 1933 /* 1934 allocate enough space to hold information from the longest row. 1935 */ 1936 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1937 PetscInt max = 1,tmp; 1938 for (i=0; i<matin->rmap->n; i++) { 1939 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1940 if (max < tmp) max = tmp; 1941 } 1942 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1943 } 1944 1945 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1946 lrow = row - rstart; 1947 1948 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1949 if (!v) {pvA = 0; pvB = 0;} 1950 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1951 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 nztot = nzA + nzB; 1954 1955 cmap = mat->garray; 1956 if (v || idx) { 1957 if (nztot) { 1958 /* Sort by increasing column numbers, assuming A and B already sorted */ 1959 PetscInt imark = -1; 1960 if (v) { 1961 *v = v_p = mat->rowvalues; 1962 for (i=0; i<nzB; i++) { 1963 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1964 else break; 1965 } 1966 imark = i; 1967 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1968 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1969 } 1970 if (idx) { 1971 *idx = idx_p = mat->rowindices; 1972 if (imark > -1) { 1973 for (i=0; i<imark; i++) { 1974 idx_p[i] = cmap[cworkB[i]]; 1975 } 1976 } else { 1977 for (i=0; i<nzB; i++) { 1978 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1979 else break; 1980 } 1981 imark = i; 1982 } 1983 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1984 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1985 } 1986 } else { 1987 if (idx) *idx = 0; 1988 if (v) *v = 0; 1989 } 1990 } 1991 *nz = nztot; 1992 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1993 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 2003 PetscFunctionBegin; 2004 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2005 aij->getrowactive = PETSC_FALSE; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatNorm_MPIAIJ" 2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2015 PetscErrorCode ierr; 2016 PetscInt i,j,cstart = mat->cmap->rstart; 2017 PetscReal sum = 0.0; 2018 MatScalar *v; 2019 2020 PetscFunctionBegin; 2021 if (aij->size == 1) { 2022 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2023 } else { 2024 if (type == NORM_FROBENIUS) { 2025 v = amat->a; 2026 for (i=0; i<amat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 v = bmat->a; 2030 for (i=0; i<bmat->nz; i++) { 2031 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2032 } 2033 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2034 *norm = PetscSqrtReal(*norm); 2035 } else if (type == NORM_1) { /* max column norm */ 2036 PetscReal *tmp,*tmp2; 2037 PetscInt *jj,*garray = aij->garray; 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2040 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2041 *norm = 0.0; 2042 v = amat->a; jj = amat->j; 2043 for (j=0; j<amat->nz; j++) { 2044 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2045 } 2046 v = bmat->a; jj = bmat->j; 2047 for (j=0; j<bmat->nz; j++) { 2048 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2049 } 2050 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2051 for (j=0; j<mat->cmap->N; j++) { 2052 if (tmp2[j] > *norm) *norm = tmp2[j]; 2053 } 2054 ierr = PetscFree(tmp);CHKERRQ(ierr); 2055 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2056 } else if (type == NORM_INFINITY) { /* max row norm */ 2057 PetscReal ntemp = 0.0; 2058 for (j=0; j<aij->A->rmap->n; j++) { 2059 v = amat->a + amat->i[j]; 2060 sum = 0.0; 2061 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 v = bmat->a + bmat->i[j]; 2065 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2066 sum += PetscAbsScalar(*v); v++; 2067 } 2068 if (sum > ntemp) ntemp = sum; 2069 } 2070 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2071 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatTranspose_MPIAIJ" 2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2079 { 2080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2081 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2082 PetscErrorCode ierr; 2083 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2084 PetscInt cstart = A->cmap->rstart,ncol; 2085 Mat B; 2086 MatScalar *array; 2087 2088 PetscFunctionBegin; 2089 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2090 2091 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2092 ai = Aloc->i; aj = Aloc->j; 2093 bi = Bloc->i; bj = Bloc->j; 2094 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2095 PetscInt *d_nnz,*g_nnz,*o_nnz; 2096 PetscSFNode *oloc; 2097 PETSC_UNUSED PetscSF sf; 2098 2099 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2100 /* compute d_nnz for preallocation */ 2101 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2102 for (i=0; i<ai[ma]; i++) { 2103 d_nnz[aj[i]]++; 2104 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2105 } 2106 /* compute local off-diagonal contributions */ 2107 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2109 /* map those to global */ 2110 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2111 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2112 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2113 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2117 2118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2119 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2120 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2121 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2124 } else { 2125 B = *matout; 2126 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2127 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2128 } 2129 2130 /* copy over the A part */ 2131 array = Aloc->a; 2132 row = A->rmap->rstart; 2133 for (i=0; i<ma; i++) { 2134 ncol = ai[i+1]-ai[i]; 2135 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2136 row++; 2137 array += ncol; aj += ncol; 2138 } 2139 aj = Aloc->j; 2140 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2141 2142 /* copy over the B part */ 2143 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2145 array = Bloc->a; 2146 row = A->rmap->rstart; 2147 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2148 cols_tmp = cols; 2149 for (i=0; i<mb; i++) { 2150 ncol = bi[i+1]-bi[i]; 2151 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; 2153 array += ncol; cols_tmp += ncol; 2154 } 2155 ierr = PetscFree(cols);CHKERRQ(ierr); 2156 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2160 *matout = B; 2161 } else { 2162 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2170 { 2171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2172 Mat a = aij->A,b = aij->B; 2173 PetscErrorCode ierr; 2174 PetscInt s1,s2,s3; 2175 2176 PetscFunctionBegin; 2177 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2178 if (rr) { 2179 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2180 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2181 /* Overlap communication with computation. */ 2182 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 if (ll) { 2185 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2186 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2187 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2188 } 2189 /* scale the diagonal block */ 2190 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2191 2192 if (rr) { 2193 /* Do a scatter end and then right scale the off-diagonal block */ 2194 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatEqual_MPIAIJ" 2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2215 { 2216 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2217 Mat a,b,c,d; 2218 PetscBool flg; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 a = matA->A; b = matA->B; 2223 c = matB->A; d = matB->B; 2224 2225 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2226 if (flg) { 2227 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2228 } 2229 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCopy_MPIAIJ" 2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2239 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2240 2241 PetscFunctionBegin; 2242 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2243 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2244 /* because of the column compression in the off-processor part of the matrix a->B, 2245 the number of columns in a->B and b->B may be different, hence we cannot call 2246 the MatCopy() directly on the two parts. If need be, we can provide a more 2247 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2248 then copying the submatrices */ 2249 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2252 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatSetUp_MPIAIJ" 2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2272 { 2273 PetscInt i,m=Y->rmap->N; 2274 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2275 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2276 const PetscInt *xi = x->i,*yi = y->i; 2277 2278 PetscFunctionBegin; 2279 /* Set the number of nonzeros in the new matrix */ 2280 for (i=0; i<m; i++) { 2281 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2282 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2283 nnz[i] = 0; 2284 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2285 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2286 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2287 nnz[i]++; 2288 } 2289 for (; k<nzy; k++) nnz[i]++; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPY_MPIAIJ" 2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2297 { 2298 PetscErrorCode ierr; 2299 PetscInt i; 2300 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2301 PetscBLASInt bnz,one=1; 2302 Mat_SeqAIJ *x,*y; 2303 2304 PetscFunctionBegin; 2305 if (str == SAME_NONZERO_PATTERN) { 2306 PetscScalar alpha = a; 2307 x = (Mat_SeqAIJ*)xx->A->data; 2308 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2309 y = (Mat_SeqAIJ*)yy->A->data; 2310 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2311 x = (Mat_SeqAIJ*)xx->B->data; 2312 y = (Mat_SeqAIJ*)yy->B->data; 2313 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2314 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2315 } else if (str == SUBSET_NONZERO_PATTERN) { 2316 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2317 2318 x = (Mat_SeqAIJ*)xx->B->data; 2319 y = (Mat_SeqAIJ*)yy->B->data; 2320 if (y->xtoy && y->XtoY != xx->B) { 2321 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2322 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2323 } 2324 if (!y->xtoy) { /* get xtoy */ 2325 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2326 y->XtoY = xx->B; 2327 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2330 } else { 2331 Mat B; 2332 PetscInt *nnz_d,*nnz_o; 2333 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2334 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2335 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2337 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2338 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2339 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2343 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2344 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatConjugate_MPIAIJ" 2355 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2356 { 2357 #if defined(PETSC_USE_COMPLEX) 2358 PetscErrorCode ierr; 2359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2360 2361 PetscFunctionBegin; 2362 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2363 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2364 #else 2365 PetscFunctionBegin; 2366 #endif 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatRealPart_MPIAIJ" 2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2373 { 2374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2379 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2392 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #if defined(PETSC_HAVE_PBGL) 2397 2398 #include <boost/parallel/mpi/bsp_process_group.hpp> 2399 #include <boost/graph/distributed/ilu_default_graph.hpp> 2400 #include <boost/graph/distributed/ilu_0_block.hpp> 2401 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2402 #include <boost/graph/distributed/petsc/interface.hpp> 2403 #include <boost/multi_array.hpp> 2404 #include <boost/parallel/distributed_property_map->hpp> 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2408 /* 2409 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2410 */ 2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2412 { 2413 namespace petsc = boost::distributed::petsc; 2414 2415 namespace graph_dist = boost::graph::distributed; 2416 using boost::graph::distributed::ilu_default::process_group_type; 2417 using boost::graph::ilu_permuted; 2418 2419 PetscBool row_identity, col_identity; 2420 PetscContainer c; 2421 PetscInt m, n, M, N; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2426 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2427 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2428 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2429 2430 process_group_type pg; 2431 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2432 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2437 ilu_permuted(level_graph); 2438 2439 /* put together the new matrix */ 2440 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2441 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2442 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2443 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2445 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2446 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 2449 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2450 ierr = PetscContainerSetPointer(c, lgraph_p); 2451 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2452 ierr = PetscContainerDestroy(&c); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2459 { 2460 PetscFunctionBegin; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSolve_MPIAIJ" 2466 /* 2467 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2468 */ 2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2470 { 2471 namespace graph_dist = boost::graph::distributed; 2472 2473 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2474 lgraph_type *lgraph_p; 2475 PetscContainer c; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2480 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2481 ierr = VecCopy(b, x);CHKERRQ(ierr); 2482 2483 PetscScalar *array_x; 2484 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2485 PetscInt sx; 2486 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2487 2488 PetscScalar *array_b; 2489 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2490 PetscInt sb; 2491 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2492 2493 lgraph_type& level_graph = *lgraph_p; 2494 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2495 2496 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2497 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2498 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2499 2500 typedef boost::iterator_property_map<array_ref_type::iterator, 2501 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2502 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2503 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2504 2505 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2506 PetscFunctionReturn(0); 2507 } 2508 #endif 2509 2510 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2511 PetscInt nzlocal,nsends,nrecvs; 2512 PetscMPIInt *send_rank,*recv_rank; 2513 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2514 PetscScalar *sbuf_a,**rbuf_a; 2515 PetscErrorCode (*Destroy)(Mat); 2516 } Mat_Redundant; 2517 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2520 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2521 { 2522 PetscErrorCode ierr; 2523 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2524 PetscInt i; 2525 2526 PetscFunctionBegin; 2527 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2528 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2529 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2530 for (i=0; i<redund->nrecvs; i++) { 2531 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2532 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2533 } 2534 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2535 ierr = PetscFree(redund);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 #undef __FUNCT__ 2540 #define __FUNCT__ "MatDestroy_MatRedundant" 2541 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2542 { 2543 PetscErrorCode ierr; 2544 PetscContainer container; 2545 Mat_Redundant *redund=NULL; 2546 2547 PetscFunctionBegin; 2548 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2549 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2550 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2551 2552 A->ops->destroy = redund->Destroy; 2553 2554 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2555 if (A->ops->destroy) { 2556 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2563 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2564 { 2565 PetscMPIInt rank,size; 2566 MPI_Comm comm; 2567 PetscErrorCode ierr; 2568 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2569 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2570 PetscInt *rowrange = mat->rmap->range; 2571 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2572 Mat A = aij->A,B=aij->B,C=*matredundant; 2573 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2574 PetscScalar *sbuf_a; 2575 PetscInt nzlocal=a->nz+b->nz; 2576 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2577 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2578 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2579 MatScalar *aworkA,*aworkB; 2580 PetscScalar *vals; 2581 PetscMPIInt tag1,tag2,tag3,imdex; 2582 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2583 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2584 MPI_Status recv_status,*send_status; 2585 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2586 PetscInt **rbuf_j=NULL; 2587 PetscScalar **rbuf_a=NULL; 2588 Mat_Redundant *redund =NULL; 2589 PetscContainer container; 2590 2591 PetscFunctionBegin; 2592 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2593 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2594 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2595 2596 if (reuse == MAT_REUSE_MATRIX) { 2597 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2598 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2599 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2600 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2601 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2602 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2603 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2604 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2605 2606 nsends = redund->nsends; 2607 nrecvs = redund->nrecvs; 2608 send_rank = redund->send_rank; 2609 recv_rank = redund->recv_rank; 2610 sbuf_nz = redund->sbuf_nz; 2611 rbuf_nz = redund->rbuf_nz; 2612 sbuf_j = redund->sbuf_j; 2613 sbuf_a = redund->sbuf_a; 2614 rbuf_j = redund->rbuf_j; 2615 rbuf_a = redund->rbuf_a; 2616 } 2617 2618 if (reuse == MAT_INITIAL_MATRIX) { 2619 PetscMPIInt subrank,subsize; 2620 PetscInt nleftover,np_subcomm; 2621 /* get the destination processors' id send_rank, nsends and nrecvs */ 2622 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2623 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2624 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2625 2626 np_subcomm = size/nsubcomm; 2627 nleftover = size - nsubcomm*np_subcomm; 2628 2629 nsends = 0; nrecvs = 0; 2630 for (i=0; i<size; i++) { /* i=rank*/ 2631 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2632 send_rank[nsends] = i; nsends++; 2633 recv_rank[nrecvs++] = i; 2634 } 2635 } 2636 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2637 i = size-nleftover-1; 2638 j = 0; 2639 while (j < nsubcomm - nleftover) { 2640 send_rank[nsends++] = i; 2641 i--; j++; 2642 } 2643 } 2644 2645 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2646 for (i=0; i<nleftover; i++) { 2647 recv_rank[nrecvs++] = size-nleftover+i; 2648 } 2649 } 2650 2651 /* allocate sbuf_j, sbuf_a */ 2652 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2653 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2654 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2655 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2656 2657 /* copy mat's local entries into the buffers */ 2658 if (reuse == MAT_INITIAL_MATRIX) { 2659 rownz_max = 0; 2660 rptr = sbuf_j; 2661 cols = sbuf_j + rend-rstart + 1; 2662 vals = sbuf_a; 2663 rptr[0] = 0; 2664 for (i=0; i<rend-rstart; i++) { 2665 row = i + rstart; 2666 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2667 ncols = nzA + nzB; 2668 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2669 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2670 /* load the column indices for this row into cols */ 2671 lwrite = 0; 2672 for (l=0; l<nzB; l++) { 2673 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2674 vals[lwrite] = aworkB[l]; 2675 cols[lwrite++] = ctmp; 2676 } 2677 } 2678 for (l=0; l<nzA; l++) { 2679 vals[lwrite] = aworkA[l]; 2680 cols[lwrite++] = cstart + cworkA[l]; 2681 } 2682 for (l=0; l<nzB; l++) { 2683 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2684 vals[lwrite] = aworkB[l]; 2685 cols[lwrite++] = ctmp; 2686 } 2687 } 2688 vals += ncols; 2689 cols += ncols; 2690 rptr[i+1] = rptr[i] + ncols; 2691 if (rownz_max < ncols) rownz_max = ncols; 2692 } 2693 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2694 } else { /* only copy matrix values into sbuf_a */ 2695 rptr = sbuf_j; 2696 vals = sbuf_a; 2697 rptr[0] = 0; 2698 for (i=0; i<rend-rstart; i++) { 2699 row = i + rstart; 2700 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2701 ncols = nzA + nzB; 2702 cworkB = b->j + b->i[i]; 2703 aworkA = a->a + a->i[i]; 2704 aworkB = b->a + b->i[i]; 2705 lwrite = 0; 2706 for (l=0; l<nzB; l++) { 2707 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2708 } 2709 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2710 for (l=0; l<nzB; l++) { 2711 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2712 } 2713 vals += ncols; 2714 rptr[i+1] = rptr[i] + ncols; 2715 } 2716 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2717 2718 /* send nzlocal to others, and recv other's nzlocal */ 2719 /*--------------------------------------------------*/ 2720 if (reuse == MAT_INITIAL_MATRIX) { 2721 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2722 2723 s_waits2 = s_waits3 + nsends; 2724 s_waits1 = s_waits2 + nsends; 2725 r_waits1 = s_waits1 + nsends; 2726 r_waits2 = r_waits1 + nrecvs; 2727 r_waits3 = r_waits2 + nrecvs; 2728 } else { 2729 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2730 2731 r_waits3 = s_waits3 + nsends; 2732 } 2733 2734 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2735 if (reuse == MAT_INITIAL_MATRIX) { 2736 /* get new tags to keep the communication clean */ 2737 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2738 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2739 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2740 2741 /* post receives of other's nzlocal */ 2742 for (i=0; i<nrecvs; i++) { 2743 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2744 } 2745 /* send nzlocal to others */ 2746 for (i=0; i<nsends; i++) { 2747 sbuf_nz[i] = nzlocal; 2748 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2749 } 2750 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2751 count = nrecvs; 2752 while (count) { 2753 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2754 2755 recv_rank[imdex] = recv_status.MPI_SOURCE; 2756 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2757 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2758 2759 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2760 2761 rbuf_nz[imdex] += i + 2; 2762 2763 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2764 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2765 count--; 2766 } 2767 /* wait on sends of nzlocal */ 2768 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2769 /* send mat->i,j to others, and recv from other's */ 2770 /*------------------------------------------------*/ 2771 for (i=0; i<nsends; i++) { 2772 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2773 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2774 } 2775 /* wait on receives of mat->i,j */ 2776 /*------------------------------*/ 2777 count = nrecvs; 2778 while (count) { 2779 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2780 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2781 count--; 2782 } 2783 /* wait on sends of mat->i,j */ 2784 /*---------------------------*/ 2785 if (nsends) { 2786 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2787 } 2788 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2789 2790 /* post receives, send and receive mat->a */ 2791 /*----------------------------------------*/ 2792 for (imdex=0; imdex<nrecvs; imdex++) { 2793 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2794 } 2795 for (i=0; i<nsends; i++) { 2796 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2797 } 2798 count = nrecvs; 2799 while (count) { 2800 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2801 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2802 count--; 2803 } 2804 if (nsends) { 2805 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2806 } 2807 2808 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2809 2810 /* create redundant matrix */ 2811 /*-------------------------*/ 2812 if (reuse == MAT_INITIAL_MATRIX) { 2813 /* compute rownz_max for preallocation */ 2814 for (imdex=0; imdex<nrecvs; imdex++) { 2815 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2816 rptr = rbuf_j[imdex]; 2817 for (i=0; i<j; i++) { 2818 ncols = rptr[i+1] - rptr[i]; 2819 if (rownz_max < ncols) rownz_max = ncols; 2820 } 2821 } 2822 2823 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2824 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2825 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2826 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2827 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2828 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2829 } else { 2830 C = *matredundant; 2831 } 2832 2833 /* insert local matrix entries */ 2834 rptr = sbuf_j; 2835 cols = sbuf_j + rend-rstart + 1; 2836 vals = sbuf_a; 2837 for (i=0; i<rend-rstart; i++) { 2838 row = i + rstart; 2839 ncols = rptr[i+1] - rptr[i]; 2840 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2841 vals += ncols; 2842 cols += ncols; 2843 } 2844 /* insert received matrix entries */ 2845 for (imdex=0; imdex<nrecvs; imdex++) { 2846 rstart = rowrange[recv_rank[imdex]]; 2847 rend = rowrange[recv_rank[imdex]+1]; 2848 rptr = rbuf_j[imdex]; 2849 cols = rbuf_j[imdex] + rend-rstart + 1; 2850 vals = rbuf_a[imdex]; 2851 for (i=0; i<rend-rstart; i++) { 2852 row = i + rstart; 2853 ncols = rptr[i+1] - rptr[i]; 2854 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2855 vals += ncols; 2856 cols += ncols; 2857 } 2858 } 2859 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2860 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2862 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2863 if (reuse == MAT_INITIAL_MATRIX) { 2864 PetscContainer container; 2865 *matredundant = C; 2866 /* create a supporting struct and attach it to C for reuse */ 2867 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2868 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2869 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2870 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2871 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2872 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2873 2874 redund->nzlocal = nzlocal; 2875 redund->nsends = nsends; 2876 redund->nrecvs = nrecvs; 2877 redund->send_rank = send_rank; 2878 redund->recv_rank = recv_rank; 2879 redund->sbuf_nz = sbuf_nz; 2880 redund->rbuf_nz = rbuf_nz; 2881 redund->sbuf_j = sbuf_j; 2882 redund->sbuf_a = sbuf_a; 2883 redund->rbuf_j = rbuf_j; 2884 redund->rbuf_a = rbuf_a; 2885 2886 redund->Destroy = C->ops->destroy; 2887 C->ops->destroy = MatDestroy_MatRedundant; 2888 } 2889 PetscFunctionReturn(0); 2890 } 2891 2892 #undef __FUNCT__ 2893 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2894 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2895 { 2896 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2897 PetscErrorCode ierr; 2898 PetscInt i,*idxb = 0; 2899 PetscScalar *va,*vb; 2900 Vec vtmp; 2901 2902 PetscFunctionBegin; 2903 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2904 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2905 if (idx) { 2906 for (i=0; i<A->rmap->n; i++) { 2907 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2908 } 2909 } 2910 2911 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2912 if (idx) { 2913 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2914 } 2915 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2916 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2917 2918 for (i=0; i<A->rmap->n; i++) { 2919 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2920 va[i] = vb[i]; 2921 if (idx) idx[i] = a->garray[idxb[i]]; 2922 } 2923 } 2924 2925 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2926 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2927 ierr = PetscFree(idxb);CHKERRQ(ierr); 2928 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2929 PetscFunctionReturn(0); 2930 } 2931 2932 #undef __FUNCT__ 2933 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2934 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2935 { 2936 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2937 PetscErrorCode ierr; 2938 PetscInt i,*idxb = 0; 2939 PetscScalar *va,*vb; 2940 Vec vtmp; 2941 2942 PetscFunctionBegin; 2943 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2944 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2945 if (idx) { 2946 for (i=0; i<A->cmap->n; i++) { 2947 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2948 } 2949 } 2950 2951 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2952 if (idx) { 2953 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2954 } 2955 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2956 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2957 2958 for (i=0; i<A->rmap->n; i++) { 2959 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2960 va[i] = vb[i]; 2961 if (idx) idx[i] = a->garray[idxb[i]]; 2962 } 2963 } 2964 2965 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2966 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2967 ierr = PetscFree(idxb);CHKERRQ(ierr); 2968 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2969 PetscFunctionReturn(0); 2970 } 2971 2972 #undef __FUNCT__ 2973 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2974 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2975 { 2976 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2977 PetscInt n = A->rmap->n; 2978 PetscInt cstart = A->cmap->rstart; 2979 PetscInt *cmap = mat->garray; 2980 PetscInt *diagIdx, *offdiagIdx; 2981 Vec diagV, offdiagV; 2982 PetscScalar *a, *diagA, *offdiagA; 2983 PetscInt r; 2984 PetscErrorCode ierr; 2985 2986 PetscFunctionBegin; 2987 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2988 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2989 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2990 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2991 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2992 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2993 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2994 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2995 for (r = 0; r < n; ++r) { 2996 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2997 a[r] = diagA[r]; 2998 idx[r] = cstart + diagIdx[r]; 2999 } else { 3000 a[r] = offdiagA[r]; 3001 idx[r] = cmap[offdiagIdx[r]]; 3002 } 3003 } 3004 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3005 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3006 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3007 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3008 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3009 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3010 PetscFunctionReturn(0); 3011 } 3012 3013 #undef __FUNCT__ 3014 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3015 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3016 { 3017 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3018 PetscInt n = A->rmap->n; 3019 PetscInt cstart = A->cmap->rstart; 3020 PetscInt *cmap = mat->garray; 3021 PetscInt *diagIdx, *offdiagIdx; 3022 Vec diagV, offdiagV; 3023 PetscScalar *a, *diagA, *offdiagA; 3024 PetscInt r; 3025 PetscErrorCode ierr; 3026 3027 PetscFunctionBegin; 3028 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3029 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3030 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3031 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3032 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3033 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3034 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3035 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3036 for (r = 0; r < n; ++r) { 3037 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3038 a[r] = diagA[r]; 3039 idx[r] = cstart + diagIdx[r]; 3040 } else { 3041 a[r] = offdiagA[r]; 3042 idx[r] = cmap[offdiagIdx[r]]; 3043 } 3044 } 3045 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3046 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3047 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3048 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3049 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3050 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3051 PetscFunctionReturn(0); 3052 } 3053 3054 #undef __FUNCT__ 3055 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3056 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3057 { 3058 PetscErrorCode ierr; 3059 Mat *dummy; 3060 3061 PetscFunctionBegin; 3062 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3063 *newmat = *dummy; 3064 ierr = PetscFree(dummy);CHKERRQ(ierr); 3065 PetscFunctionReturn(0); 3066 } 3067 3068 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3069 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3072 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3073 { 3074 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3075 PetscErrorCode ierr; 3076 3077 PetscFunctionBegin; 3078 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3079 PetscFunctionReturn(0); 3080 } 3081 3082 #undef __FUNCT__ 3083 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3084 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3085 { 3086 PetscErrorCode ierr; 3087 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3088 3089 PetscFunctionBegin; 3090 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3091 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3092 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3093 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3094 PetscFunctionReturn(0); 3095 } 3096 3097 /* -------------------------------------------------------------------*/ 3098 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3099 MatGetRow_MPIAIJ, 3100 MatRestoreRow_MPIAIJ, 3101 MatMult_MPIAIJ, 3102 /* 4*/ MatMultAdd_MPIAIJ, 3103 MatMultTranspose_MPIAIJ, 3104 MatMultTransposeAdd_MPIAIJ, 3105 #if defined(PETSC_HAVE_PBGL) 3106 MatSolve_MPIAIJ, 3107 #else 3108 0, 3109 #endif 3110 0, 3111 0, 3112 /*10*/ 0, 3113 0, 3114 0, 3115 MatSOR_MPIAIJ, 3116 MatTranspose_MPIAIJ, 3117 /*15*/ MatGetInfo_MPIAIJ, 3118 MatEqual_MPIAIJ, 3119 MatGetDiagonal_MPIAIJ, 3120 MatDiagonalScale_MPIAIJ, 3121 MatNorm_MPIAIJ, 3122 /*20*/ MatAssemblyBegin_MPIAIJ, 3123 MatAssemblyEnd_MPIAIJ, 3124 MatSetOption_MPIAIJ, 3125 MatZeroEntries_MPIAIJ, 3126 /*24*/ MatZeroRows_MPIAIJ, 3127 0, 3128 #if defined(PETSC_HAVE_PBGL) 3129 0, 3130 #else 3131 0, 3132 #endif 3133 0, 3134 0, 3135 /*29*/ MatSetUp_MPIAIJ, 3136 #if defined(PETSC_HAVE_PBGL) 3137 0, 3138 #else 3139 0, 3140 #endif 3141 0, 3142 0, 3143 0, 3144 /*34*/ MatDuplicate_MPIAIJ, 3145 0, 3146 0, 3147 0, 3148 0, 3149 /*39*/ MatAXPY_MPIAIJ, 3150 MatGetSubMatrices_MPIAIJ, 3151 MatIncreaseOverlap_MPIAIJ, 3152 MatGetValues_MPIAIJ, 3153 MatCopy_MPIAIJ, 3154 /*44*/ MatGetRowMax_MPIAIJ, 3155 MatScale_MPIAIJ, 3156 0, 3157 0, 3158 MatZeroRowsColumns_MPIAIJ, 3159 /*49*/ MatSetRandom_MPIAIJ, 3160 0, 3161 0, 3162 0, 3163 0, 3164 /*54*/ MatFDColoringCreate_MPIAIJ, 3165 0, 3166 MatSetUnfactored_MPIAIJ, 3167 MatPermute_MPIAIJ, 3168 0, 3169 /*59*/ MatGetSubMatrix_MPIAIJ, 3170 MatDestroy_MPIAIJ, 3171 MatView_MPIAIJ, 3172 0, 3173 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3174 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3175 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3176 0, 3177 0, 3178 0, 3179 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3180 MatGetRowMinAbs_MPIAIJ, 3181 0, 3182 MatSetColoring_MPIAIJ, 3183 0, 3184 MatSetValuesAdifor_MPIAIJ, 3185 /*75*/ MatFDColoringApply_AIJ, 3186 0, 3187 0, 3188 0, 3189 MatFindZeroDiagonals_MPIAIJ, 3190 /*80*/ 0, 3191 0, 3192 0, 3193 /*83*/ MatLoad_MPIAIJ, 3194 0, 3195 0, 3196 0, 3197 0, 3198 0, 3199 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3200 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3201 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3202 MatPtAP_MPIAIJ_MPIAIJ, 3203 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3204 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3205 0, 3206 0, 3207 0, 3208 0, 3209 /*99*/ 0, 3210 0, 3211 0, 3212 MatConjugate_MPIAIJ, 3213 0, 3214 /*104*/MatSetValuesRow_MPIAIJ, 3215 MatRealPart_MPIAIJ, 3216 MatImaginaryPart_MPIAIJ, 3217 0, 3218 0, 3219 /*109*/0, 3220 MatGetRedundantMatrix_MPIAIJ, 3221 MatGetRowMin_MPIAIJ, 3222 0, 3223 0, 3224 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3225 0, 3226 0, 3227 0, 3228 0, 3229 /*119*/0, 3230 0, 3231 0, 3232 0, 3233 MatGetMultiProcBlock_MPIAIJ, 3234 /*124*/MatFindNonzeroRows_MPIAIJ, 3235 MatGetColumnNorms_MPIAIJ, 3236 MatInvertBlockDiagonal_MPIAIJ, 3237 0, 3238 MatGetSubMatricesParallel_MPIAIJ, 3239 /*129*/0, 3240 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3241 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3242 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3243 0, 3244 /*134*/0, 3245 0, 3246 0, 3247 0, 3248 0, 3249 /*139*/0, 3250 0 3251 }; 3252 3253 /* ----------------------------------------------------------------------------------------*/ 3254 3255 #undef __FUNCT__ 3256 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3257 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3258 { 3259 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3260 PetscErrorCode ierr; 3261 3262 PetscFunctionBegin; 3263 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3264 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 3268 #undef __FUNCT__ 3269 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3270 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3271 { 3272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3273 PetscErrorCode ierr; 3274 3275 PetscFunctionBegin; 3276 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3277 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3278 PetscFunctionReturn(0); 3279 } 3280 3281 #undef __FUNCT__ 3282 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3283 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3284 { 3285 Mat_MPIAIJ *b; 3286 PetscErrorCode ierr; 3287 3288 PetscFunctionBegin; 3289 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3290 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3291 b = (Mat_MPIAIJ*)B->data; 3292 3293 if (!B->preallocated) { 3294 /* Explicitly create 2 MATSEQAIJ matrices. */ 3295 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3296 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3297 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3298 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3299 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3300 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3301 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3302 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3303 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3304 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3305 } 3306 3307 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3308 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3309 B->preallocated = PETSC_TRUE; 3310 PetscFunctionReturn(0); 3311 } 3312 3313 #undef __FUNCT__ 3314 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3315 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3316 { 3317 Mat mat; 3318 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3319 PetscErrorCode ierr; 3320 3321 PetscFunctionBegin; 3322 *newmat = 0; 3323 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3324 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3325 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3326 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3327 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3328 a = (Mat_MPIAIJ*)mat->data; 3329 3330 mat->factortype = matin->factortype; 3331 mat->rmap->bs = matin->rmap->bs; 3332 mat->cmap->bs = matin->cmap->bs; 3333 mat->assembled = PETSC_TRUE; 3334 mat->insertmode = NOT_SET_VALUES; 3335 mat->preallocated = PETSC_TRUE; 3336 3337 a->size = oldmat->size; 3338 a->rank = oldmat->rank; 3339 a->donotstash = oldmat->donotstash; 3340 a->roworiented = oldmat->roworiented; 3341 a->rowindices = 0; 3342 a->rowvalues = 0; 3343 a->getrowactive = PETSC_FALSE; 3344 3345 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3346 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3347 3348 if (oldmat->colmap) { 3349 #if defined(PETSC_USE_CTABLE) 3350 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3351 #else 3352 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3353 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3354 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3355 #endif 3356 } else a->colmap = 0; 3357 if (oldmat->garray) { 3358 PetscInt len; 3359 len = oldmat->B->cmap->n; 3360 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3361 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3362 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3363 } else a->garray = 0; 3364 3365 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3366 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3367 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3368 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3369 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3370 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3371 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3372 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3373 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3374 *newmat = mat; 3375 PetscFunctionReturn(0); 3376 } 3377 3378 3379 3380 #undef __FUNCT__ 3381 #define __FUNCT__ "MatLoad_MPIAIJ" 3382 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3383 { 3384 PetscScalar *vals,*svals; 3385 MPI_Comm comm; 3386 PetscErrorCode ierr; 3387 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3388 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3389 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3390 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3391 PetscInt cend,cstart,n,*rowners,sizesset=1; 3392 int fd; 3393 PetscInt bs = 1; 3394 3395 PetscFunctionBegin; 3396 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3397 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3398 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3399 if (!rank) { 3400 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3401 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3402 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3403 } 3404 3405 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3406 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3407 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3408 3409 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3410 3411 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3412 M = header[1]; N = header[2]; 3413 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3414 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3415 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3416 3417 /* If global sizes are set, check if they are consistent with that given in the file */ 3418 if (sizesset) { 3419 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3420 } 3421 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3422 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3423 3424 /* determine ownership of all (block) rows */ 3425 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3426 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3427 else m = newMat->rmap->n; /* Set by user */ 3428 3429 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3430 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3431 3432 /* First process needs enough room for process with most rows */ 3433 if (!rank) { 3434 mmax = rowners[1]; 3435 for (i=2; i<=size; i++) { 3436 mmax = PetscMax(mmax, rowners[i]); 3437 } 3438 } else mmax = -1; /* unused, but compilers complain */ 3439 3440 rowners[0] = 0; 3441 for (i=2; i<=size; i++) { 3442 rowners[i] += rowners[i-1]; 3443 } 3444 rstart = rowners[rank]; 3445 rend = rowners[rank+1]; 3446 3447 /* distribute row lengths to all processors */ 3448 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3449 if (!rank) { 3450 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3451 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3452 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3453 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3454 for (j=0; j<m; j++) { 3455 procsnz[0] += ourlens[j]; 3456 } 3457 for (i=1; i<size; i++) { 3458 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3459 /* calculate the number of nonzeros on each processor */ 3460 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3461 procsnz[i] += rowlengths[j]; 3462 } 3463 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3464 } 3465 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3466 } else { 3467 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3468 } 3469 3470 if (!rank) { 3471 /* determine max buffer needed and allocate it */ 3472 maxnz = 0; 3473 for (i=0; i<size; i++) { 3474 maxnz = PetscMax(maxnz,procsnz[i]); 3475 } 3476 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3477 3478 /* read in my part of the matrix column indices */ 3479 nz = procsnz[0]; 3480 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3481 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3482 3483 /* read in every one elses and ship off */ 3484 for (i=1; i<size; i++) { 3485 nz = procsnz[i]; 3486 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3487 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3488 } 3489 ierr = PetscFree(cols);CHKERRQ(ierr); 3490 } else { 3491 /* determine buffer space needed for message */ 3492 nz = 0; 3493 for (i=0; i<m; i++) { 3494 nz += ourlens[i]; 3495 } 3496 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3497 3498 /* receive message of column indices*/ 3499 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3500 } 3501 3502 /* determine column ownership if matrix is not square */ 3503 if (N != M) { 3504 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3505 else n = newMat->cmap->n; 3506 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3507 cstart = cend - n; 3508 } else { 3509 cstart = rstart; 3510 cend = rend; 3511 n = cend - cstart; 3512 } 3513 3514 /* loop over local rows, determining number of off diagonal entries */ 3515 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3516 jj = 0; 3517 for (i=0; i<m; i++) { 3518 for (j=0; j<ourlens[i]; j++) { 3519 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3520 jj++; 3521 } 3522 } 3523 3524 for (i=0; i<m; i++) { 3525 ourlens[i] -= offlens[i]; 3526 } 3527 if (!sizesset) { 3528 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3529 } 3530 3531 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3532 3533 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3534 3535 for (i=0; i<m; i++) { 3536 ourlens[i] += offlens[i]; 3537 } 3538 3539 if (!rank) { 3540 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3541 3542 /* read in my part of the matrix numerical values */ 3543 nz = procsnz[0]; 3544 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3545 3546 /* insert into matrix */ 3547 jj = rstart; 3548 smycols = mycols; 3549 svals = vals; 3550 for (i=0; i<m; i++) { 3551 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3552 smycols += ourlens[i]; 3553 svals += ourlens[i]; 3554 jj++; 3555 } 3556 3557 /* read in other processors and ship out */ 3558 for (i=1; i<size; i++) { 3559 nz = procsnz[i]; 3560 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3561 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3562 } 3563 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3564 } else { 3565 /* receive numeric values */ 3566 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3567 3568 /* receive message of values*/ 3569 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3570 3571 /* insert into matrix */ 3572 jj = rstart; 3573 smycols = mycols; 3574 svals = vals; 3575 for (i=0; i<m; i++) { 3576 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3577 smycols += ourlens[i]; 3578 svals += ourlens[i]; 3579 jj++; 3580 } 3581 } 3582 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3583 ierr = PetscFree(vals);CHKERRQ(ierr); 3584 ierr = PetscFree(mycols);CHKERRQ(ierr); 3585 ierr = PetscFree(rowners);CHKERRQ(ierr); 3586 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3587 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3588 PetscFunctionReturn(0); 3589 } 3590 3591 #undef __FUNCT__ 3592 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3593 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3594 { 3595 PetscErrorCode ierr; 3596 IS iscol_local; 3597 PetscInt csize; 3598 3599 PetscFunctionBegin; 3600 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3601 if (call == MAT_REUSE_MATRIX) { 3602 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3603 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3604 } else { 3605 PetscInt cbs; 3606 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3607 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3608 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3609 } 3610 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3611 if (call == MAT_INITIAL_MATRIX) { 3612 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3613 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3614 } 3615 PetscFunctionReturn(0); 3616 } 3617 3618 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3619 #undef __FUNCT__ 3620 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3621 /* 3622 Not great since it makes two copies of the submatrix, first an SeqAIJ 3623 in local and then by concatenating the local matrices the end result. 3624 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3625 3626 Note: This requires a sequential iscol with all indices. 3627 */ 3628 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3629 { 3630 PetscErrorCode ierr; 3631 PetscMPIInt rank,size; 3632 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3633 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3634 PetscBool allcolumns, colflag; 3635 Mat M,Mreuse; 3636 MatScalar *vwork,*aa; 3637 MPI_Comm comm; 3638 Mat_SeqAIJ *aij; 3639 3640 PetscFunctionBegin; 3641 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3642 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3643 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3644 3645 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3646 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3647 if (colflag && ncol == mat->cmap->N) { 3648 allcolumns = PETSC_TRUE; 3649 } else { 3650 allcolumns = PETSC_FALSE; 3651 } 3652 if (call == MAT_REUSE_MATRIX) { 3653 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3654 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3655 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3656 } else { 3657 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3658 } 3659 3660 /* 3661 m - number of local rows 3662 n - number of columns (same on all processors) 3663 rstart - first row in new global matrix generated 3664 */ 3665 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3666 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3667 if (call == MAT_INITIAL_MATRIX) { 3668 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3669 ii = aij->i; 3670 jj = aij->j; 3671 3672 /* 3673 Determine the number of non-zeros in the diagonal and off-diagonal 3674 portions of the matrix in order to do correct preallocation 3675 */ 3676 3677 /* first get start and end of "diagonal" columns */ 3678 if (csize == PETSC_DECIDE) { 3679 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3680 if (mglobal == n) { /* square matrix */ 3681 nlocal = m; 3682 } else { 3683 nlocal = n/size + ((n % size) > rank); 3684 } 3685 } else { 3686 nlocal = csize; 3687 } 3688 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3689 rstart = rend - nlocal; 3690 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3691 3692 /* next, compute all the lengths */ 3693 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3694 olens = dlens + m; 3695 for (i=0; i<m; i++) { 3696 jend = ii[i+1] - ii[i]; 3697 olen = 0; 3698 dlen = 0; 3699 for (j=0; j<jend; j++) { 3700 if (*jj < rstart || *jj >= rend) olen++; 3701 else dlen++; 3702 jj++; 3703 } 3704 olens[i] = olen; 3705 dlens[i] = dlen; 3706 } 3707 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3708 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3709 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3710 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3711 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3712 ierr = PetscFree(dlens);CHKERRQ(ierr); 3713 } else { 3714 PetscInt ml,nl; 3715 3716 M = *newmat; 3717 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3718 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3719 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3720 /* 3721 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3722 rather than the slower MatSetValues(). 3723 */ 3724 M->was_assembled = PETSC_TRUE; 3725 M->assembled = PETSC_FALSE; 3726 } 3727 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3728 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3729 ii = aij->i; 3730 jj = aij->j; 3731 aa = aij->a; 3732 for (i=0; i<m; i++) { 3733 row = rstart + i; 3734 nz = ii[i+1] - ii[i]; 3735 cwork = jj; jj += nz; 3736 vwork = aa; aa += nz; 3737 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3738 } 3739 3740 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3741 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3742 *newmat = M; 3743 3744 /* save submatrix used in processor for next request */ 3745 if (call == MAT_INITIAL_MATRIX) { 3746 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3747 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3748 } 3749 PetscFunctionReturn(0); 3750 } 3751 3752 #undef __FUNCT__ 3753 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3754 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3755 { 3756 PetscInt m,cstart, cend,j,nnz,i,d; 3757 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3758 const PetscInt *JJ; 3759 PetscScalar *values; 3760 PetscErrorCode ierr; 3761 3762 PetscFunctionBegin; 3763 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3764 3765 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3766 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3767 m = B->rmap->n; 3768 cstart = B->cmap->rstart; 3769 cend = B->cmap->rend; 3770 rstart = B->rmap->rstart; 3771 3772 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3773 3774 #if defined(PETSC_USE_DEBUGGING) 3775 for (i=0; i<m; i++) { 3776 nnz = Ii[i+1]- Ii[i]; 3777 JJ = J + Ii[i]; 3778 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3779 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3780 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3781 } 3782 #endif 3783 3784 for (i=0; i<m; i++) { 3785 nnz = Ii[i+1]- Ii[i]; 3786 JJ = J + Ii[i]; 3787 nnz_max = PetscMax(nnz_max,nnz); 3788 d = 0; 3789 for (j=0; j<nnz; j++) { 3790 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3791 } 3792 d_nnz[i] = d; 3793 o_nnz[i] = nnz - d; 3794 } 3795 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3796 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3797 3798 if (v) values = (PetscScalar*)v; 3799 else { 3800 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3801 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3802 } 3803 3804 for (i=0; i<m; i++) { 3805 ii = i + rstart; 3806 nnz = Ii[i+1]- Ii[i]; 3807 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3808 } 3809 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3810 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3811 3812 if (!v) { 3813 ierr = PetscFree(values);CHKERRQ(ierr); 3814 } 3815 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3816 PetscFunctionReturn(0); 3817 } 3818 3819 #undef __FUNCT__ 3820 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3821 /*@ 3822 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3823 (the default parallel PETSc format). 3824 3825 Collective on MPI_Comm 3826 3827 Input Parameters: 3828 + B - the matrix 3829 . i - the indices into j for the start of each local row (starts with zero) 3830 . j - the column indices for each local row (starts with zero) 3831 - v - optional values in the matrix 3832 3833 Level: developer 3834 3835 Notes: 3836 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3837 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3838 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3839 3840 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3841 3842 The format which is used for the sparse matrix input, is equivalent to a 3843 row-major ordering.. i.e for the following matrix, the input data expected is 3844 as shown: 3845 3846 1 0 0 3847 2 0 3 P0 3848 ------- 3849 4 5 6 P1 3850 3851 Process0 [P0]: rows_owned=[0,1] 3852 i = {0,1,3} [size = nrow+1 = 2+1] 3853 j = {0,0,2} [size = nz = 6] 3854 v = {1,2,3} [size = nz = 6] 3855 3856 Process1 [P1]: rows_owned=[2] 3857 i = {0,3} [size = nrow+1 = 1+1] 3858 j = {0,1,2} [size = nz = 6] 3859 v = {4,5,6} [size = nz = 6] 3860 3861 .keywords: matrix, aij, compressed row, sparse, parallel 3862 3863 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3864 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3865 @*/ 3866 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3867 { 3868 PetscErrorCode ierr; 3869 3870 PetscFunctionBegin; 3871 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3872 PetscFunctionReturn(0); 3873 } 3874 3875 #undef __FUNCT__ 3876 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3877 /*@C 3878 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3879 (the default parallel PETSc format). For good matrix assembly performance 3880 the user should preallocate the matrix storage by setting the parameters 3881 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3882 performance can be increased by more than a factor of 50. 3883 3884 Collective on MPI_Comm 3885 3886 Input Parameters: 3887 + A - the matrix 3888 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3889 (same value is used for all local rows) 3890 . d_nnz - array containing the number of nonzeros in the various rows of the 3891 DIAGONAL portion of the local submatrix (possibly different for each row) 3892 or NULL, if d_nz is used to specify the nonzero structure. 3893 The size of this array is equal to the number of local rows, i.e 'm'. 3894 For matrices that will be factored, you must leave room for (and set) 3895 the diagonal entry even if it is zero. 3896 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3897 submatrix (same value is used for all local rows). 3898 - o_nnz - array containing the number of nonzeros in the various rows of the 3899 OFF-DIAGONAL portion of the local submatrix (possibly different for 3900 each row) or NULL, if o_nz is used to specify the nonzero 3901 structure. The size of this array is equal to the number 3902 of local rows, i.e 'm'. 3903 3904 If the *_nnz parameter is given then the *_nz parameter is ignored 3905 3906 The AIJ format (also called the Yale sparse matrix format or 3907 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3908 storage. The stored row and column indices begin with zero. 3909 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3910 3911 The parallel matrix is partitioned such that the first m0 rows belong to 3912 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3913 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3914 3915 The DIAGONAL portion of the local submatrix of a processor can be defined 3916 as the submatrix which is obtained by extraction the part corresponding to 3917 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3918 first row that belongs to the processor, r2 is the last row belonging to 3919 the this processor, and c1-c2 is range of indices of the local part of a 3920 vector suitable for applying the matrix to. This is an mxn matrix. In the 3921 common case of a square matrix, the row and column ranges are the same and 3922 the DIAGONAL part is also square. The remaining portion of the local 3923 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3924 3925 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3926 3927 You can call MatGetInfo() to get information on how effective the preallocation was; 3928 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3929 You can also run with the option -info and look for messages with the string 3930 malloc in them to see if additional memory allocation was needed. 3931 3932 Example usage: 3933 3934 Consider the following 8x8 matrix with 34 non-zero values, that is 3935 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3936 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3937 as follows: 3938 3939 .vb 3940 1 2 0 | 0 3 0 | 0 4 3941 Proc0 0 5 6 | 7 0 0 | 8 0 3942 9 0 10 | 11 0 0 | 12 0 3943 ------------------------------------- 3944 13 0 14 | 15 16 17 | 0 0 3945 Proc1 0 18 0 | 19 20 21 | 0 0 3946 0 0 0 | 22 23 0 | 24 0 3947 ------------------------------------- 3948 Proc2 25 26 27 | 0 0 28 | 29 0 3949 30 0 0 | 31 32 33 | 0 34 3950 .ve 3951 3952 This can be represented as a collection of submatrices as: 3953 3954 .vb 3955 A B C 3956 D E F 3957 G H I 3958 .ve 3959 3960 Where the submatrices A,B,C are owned by proc0, D,E,F are 3961 owned by proc1, G,H,I are owned by proc2. 3962 3963 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3964 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3965 The 'M','N' parameters are 8,8, and have the same values on all procs. 3966 3967 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3968 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3969 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3970 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3971 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3972 matrix, ans [DF] as another SeqAIJ matrix. 3973 3974 When d_nz, o_nz parameters are specified, d_nz storage elements are 3975 allocated for every row of the local diagonal submatrix, and o_nz 3976 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3977 One way to choose d_nz and o_nz is to use the max nonzerors per local 3978 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3979 In this case, the values of d_nz,o_nz are: 3980 .vb 3981 proc0 : dnz = 2, o_nz = 2 3982 proc1 : dnz = 3, o_nz = 2 3983 proc2 : dnz = 1, o_nz = 4 3984 .ve 3985 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3986 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3987 for proc3. i.e we are using 12+15+10=37 storage locations to store 3988 34 values. 3989 3990 When d_nnz, o_nnz parameters are specified, the storage is specified 3991 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3992 In the above case the values for d_nnz,o_nnz are: 3993 .vb 3994 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3995 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3996 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3997 .ve 3998 Here the space allocated is sum of all the above values i.e 34, and 3999 hence pre-allocation is perfect. 4000 4001 Level: intermediate 4002 4003 .keywords: matrix, aij, compressed row, sparse, parallel 4004 4005 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4006 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4007 @*/ 4008 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4009 { 4010 PetscErrorCode ierr; 4011 4012 PetscFunctionBegin; 4013 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4014 PetscValidType(B,1); 4015 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4016 PetscFunctionReturn(0); 4017 } 4018 4019 #undef __FUNCT__ 4020 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4021 /*@ 4022 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4023 CSR format the local rows. 4024 4025 Collective on MPI_Comm 4026 4027 Input Parameters: 4028 + comm - MPI communicator 4029 . m - number of local rows (Cannot be PETSC_DECIDE) 4030 . n - This value should be the same as the local size used in creating the 4031 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4032 calculated if N is given) For square matrices n is almost always m. 4033 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4034 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4035 . i - row indices 4036 . j - column indices 4037 - a - matrix values 4038 4039 Output Parameter: 4040 . mat - the matrix 4041 4042 Level: intermediate 4043 4044 Notes: 4045 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4046 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4047 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4048 4049 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4050 4051 The format which is used for the sparse matrix input, is equivalent to a 4052 row-major ordering.. i.e for the following matrix, the input data expected is 4053 as shown: 4054 4055 1 0 0 4056 2 0 3 P0 4057 ------- 4058 4 5 6 P1 4059 4060 Process0 [P0]: rows_owned=[0,1] 4061 i = {0,1,3} [size = nrow+1 = 2+1] 4062 j = {0,0,2} [size = nz = 6] 4063 v = {1,2,3} [size = nz = 6] 4064 4065 Process1 [P1]: rows_owned=[2] 4066 i = {0,3} [size = nrow+1 = 1+1] 4067 j = {0,1,2} [size = nz = 6] 4068 v = {4,5,6} [size = nz = 6] 4069 4070 .keywords: matrix, aij, compressed row, sparse, parallel 4071 4072 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4073 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4074 @*/ 4075 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4076 { 4077 PetscErrorCode ierr; 4078 4079 PetscFunctionBegin; 4080 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4081 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4082 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4083 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4084 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4085 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4086 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4087 PetscFunctionReturn(0); 4088 } 4089 4090 #undef __FUNCT__ 4091 #define __FUNCT__ "MatCreateAIJ" 4092 /*@C 4093 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4094 (the default parallel PETSc format). For good matrix assembly performance 4095 the user should preallocate the matrix storage by setting the parameters 4096 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4097 performance can be increased by more than a factor of 50. 4098 4099 Collective on MPI_Comm 4100 4101 Input Parameters: 4102 + comm - MPI communicator 4103 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4104 This value should be the same as the local size used in creating the 4105 y vector for the matrix-vector product y = Ax. 4106 . n - This value should be the same as the local size used in creating the 4107 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4108 calculated if N is given) For square matrices n is almost always m. 4109 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4110 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4111 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4112 (same value is used for all local rows) 4113 . d_nnz - array containing the number of nonzeros in the various rows of the 4114 DIAGONAL portion of the local submatrix (possibly different for each row) 4115 or NULL, if d_nz is used to specify the nonzero structure. 4116 The size of this array is equal to the number of local rows, i.e 'm'. 4117 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4118 submatrix (same value is used for all local rows). 4119 - o_nnz - array containing the number of nonzeros in the various rows of the 4120 OFF-DIAGONAL portion of the local submatrix (possibly different for 4121 each row) or NULL, if o_nz is used to specify the nonzero 4122 structure. The size of this array is equal to the number 4123 of local rows, i.e 'm'. 4124 4125 Output Parameter: 4126 . A - the matrix 4127 4128 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4129 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4130 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4131 4132 Notes: 4133 If the *_nnz parameter is given then the *_nz parameter is ignored 4134 4135 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4136 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4137 storage requirements for this matrix. 4138 4139 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4140 processor than it must be used on all processors that share the object for 4141 that argument. 4142 4143 The user MUST specify either the local or global matrix dimensions 4144 (possibly both). 4145 4146 The parallel matrix is partitioned across processors such that the 4147 first m0 rows belong to process 0, the next m1 rows belong to 4148 process 1, the next m2 rows belong to process 2 etc.. where 4149 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4150 values corresponding to [m x N] submatrix. 4151 4152 The columns are logically partitioned with the n0 columns belonging 4153 to 0th partition, the next n1 columns belonging to the next 4154 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4155 4156 The DIAGONAL portion of the local submatrix on any given processor 4157 is the submatrix corresponding to the rows and columns m,n 4158 corresponding to the given processor. i.e diagonal matrix on 4159 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4160 etc. The remaining portion of the local submatrix [m x (N-n)] 4161 constitute the OFF-DIAGONAL portion. The example below better 4162 illustrates this concept. 4163 4164 For a square global matrix we define each processor's diagonal portion 4165 to be its local rows and the corresponding columns (a square submatrix); 4166 each processor's off-diagonal portion encompasses the remainder of the 4167 local matrix (a rectangular submatrix). 4168 4169 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4170 4171 When calling this routine with a single process communicator, a matrix of 4172 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4173 type of communicator, use the construction mechanism: 4174 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4175 4176 By default, this format uses inodes (identical nodes) when possible. 4177 We search for consecutive rows with the same nonzero structure, thereby 4178 reusing matrix information to achieve increased efficiency. 4179 4180 Options Database Keys: 4181 + -mat_no_inode - Do not use inodes 4182 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4183 - -mat_aij_oneindex - Internally use indexing starting at 1 4184 rather than 0. Note that when calling MatSetValues(), 4185 the user still MUST index entries starting at 0! 4186 4187 4188 Example usage: 4189 4190 Consider the following 8x8 matrix with 34 non-zero values, that is 4191 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4192 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4193 as follows: 4194 4195 .vb 4196 1 2 0 | 0 3 0 | 0 4 4197 Proc0 0 5 6 | 7 0 0 | 8 0 4198 9 0 10 | 11 0 0 | 12 0 4199 ------------------------------------- 4200 13 0 14 | 15 16 17 | 0 0 4201 Proc1 0 18 0 | 19 20 21 | 0 0 4202 0 0 0 | 22 23 0 | 24 0 4203 ------------------------------------- 4204 Proc2 25 26 27 | 0 0 28 | 29 0 4205 30 0 0 | 31 32 33 | 0 34 4206 .ve 4207 4208 This can be represented as a collection of submatrices as: 4209 4210 .vb 4211 A B C 4212 D E F 4213 G H I 4214 .ve 4215 4216 Where the submatrices A,B,C are owned by proc0, D,E,F are 4217 owned by proc1, G,H,I are owned by proc2. 4218 4219 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4220 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4221 The 'M','N' parameters are 8,8, and have the same values on all procs. 4222 4223 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4224 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4225 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4226 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4227 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4228 matrix, ans [DF] as another SeqAIJ matrix. 4229 4230 When d_nz, o_nz parameters are specified, d_nz storage elements are 4231 allocated for every row of the local diagonal submatrix, and o_nz 4232 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4233 One way to choose d_nz and o_nz is to use the max nonzerors per local 4234 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4235 In this case, the values of d_nz,o_nz are: 4236 .vb 4237 proc0 : dnz = 2, o_nz = 2 4238 proc1 : dnz = 3, o_nz = 2 4239 proc2 : dnz = 1, o_nz = 4 4240 .ve 4241 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4242 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4243 for proc3. i.e we are using 12+15+10=37 storage locations to store 4244 34 values. 4245 4246 When d_nnz, o_nnz parameters are specified, the storage is specified 4247 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4248 In the above case the values for d_nnz,o_nnz are: 4249 .vb 4250 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4251 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4252 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4253 .ve 4254 Here the space allocated is sum of all the above values i.e 34, and 4255 hence pre-allocation is perfect. 4256 4257 Level: intermediate 4258 4259 .keywords: matrix, aij, compressed row, sparse, parallel 4260 4261 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4262 MPIAIJ, MatCreateMPIAIJWithArrays() 4263 @*/ 4264 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4265 { 4266 PetscErrorCode ierr; 4267 PetscMPIInt size; 4268 4269 PetscFunctionBegin; 4270 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4271 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4273 if (size > 1) { 4274 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4275 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4276 } else { 4277 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4278 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4279 } 4280 PetscFunctionReturn(0); 4281 } 4282 4283 #undef __FUNCT__ 4284 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4285 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4286 { 4287 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4288 4289 PetscFunctionBegin; 4290 *Ad = a->A; 4291 *Ao = a->B; 4292 *colmap = a->garray; 4293 PetscFunctionReturn(0); 4294 } 4295 4296 #undef __FUNCT__ 4297 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4298 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4299 { 4300 PetscErrorCode ierr; 4301 PetscInt i; 4302 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4303 4304 PetscFunctionBegin; 4305 if (coloring->ctype == IS_COLORING_GLOBAL) { 4306 ISColoringValue *allcolors,*colors; 4307 ISColoring ocoloring; 4308 4309 /* set coloring for diagonal portion */ 4310 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4311 4312 /* set coloring for off-diagonal portion */ 4313 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4314 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4315 for (i=0; i<a->B->cmap->n; i++) { 4316 colors[i] = allcolors[a->garray[i]]; 4317 } 4318 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4319 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4320 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4321 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4322 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4323 ISColoringValue *colors; 4324 PetscInt *larray; 4325 ISColoring ocoloring; 4326 4327 /* set coloring for diagonal portion */ 4328 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4329 for (i=0; i<a->A->cmap->n; i++) { 4330 larray[i] = i + A->cmap->rstart; 4331 } 4332 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4333 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4334 for (i=0; i<a->A->cmap->n; i++) { 4335 colors[i] = coloring->colors[larray[i]]; 4336 } 4337 ierr = PetscFree(larray);CHKERRQ(ierr); 4338 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4339 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4340 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4341 4342 /* set coloring for off-diagonal portion */ 4343 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4344 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4345 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4346 for (i=0; i<a->B->cmap->n; i++) { 4347 colors[i] = coloring->colors[larray[i]]; 4348 } 4349 ierr = PetscFree(larray);CHKERRQ(ierr); 4350 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4351 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4352 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4353 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4354 PetscFunctionReturn(0); 4355 } 4356 4357 #undef __FUNCT__ 4358 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4359 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4360 { 4361 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4362 PetscErrorCode ierr; 4363 4364 PetscFunctionBegin; 4365 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4366 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4367 PetscFunctionReturn(0); 4368 } 4369 4370 #undef __FUNCT__ 4371 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4372 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4373 { 4374 PetscErrorCode ierr; 4375 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4376 PetscInt *indx; 4377 4378 PetscFunctionBegin; 4379 /* This routine will ONLY return MPIAIJ type matrix */ 4380 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4381 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4382 if (n == PETSC_DECIDE) { 4383 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4384 } 4385 /* Check sum(n) = N */ 4386 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4387 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4388 4389 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4390 rstart -= m; 4391 4392 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4393 for (i=0; i<m; i++) { 4394 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4395 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4396 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4397 } 4398 4399 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4400 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4401 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4402 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4403 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4404 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4405 PetscFunctionReturn(0); 4406 } 4407 4408 #undef __FUNCT__ 4409 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4410 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4411 { 4412 PetscErrorCode ierr; 4413 PetscInt m,N,i,rstart,nnz,Ii; 4414 PetscInt *indx; 4415 PetscScalar *values; 4416 4417 PetscFunctionBegin; 4418 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4419 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4420 for (i=0; i<m; i++) { 4421 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4422 Ii = i + rstart; 4423 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4424 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4425 } 4426 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4427 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4428 PetscFunctionReturn(0); 4429 } 4430 4431 #undef __FUNCT__ 4432 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4433 /*@ 4434 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4435 matrices from each processor 4436 4437 Collective on MPI_Comm 4438 4439 Input Parameters: 4440 + comm - the communicators the parallel matrix will live on 4441 . inmat - the input sequential matrices 4442 . n - number of local columns (or PETSC_DECIDE) 4443 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4444 4445 Output Parameter: 4446 . outmat - the parallel matrix generated 4447 4448 Level: advanced 4449 4450 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4451 4452 @*/ 4453 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4454 { 4455 PetscErrorCode ierr; 4456 4457 PetscFunctionBegin; 4458 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4459 if (scall == MAT_INITIAL_MATRIX) { 4460 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4461 } 4462 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4463 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4464 PetscFunctionReturn(0); 4465 } 4466 4467 #undef __FUNCT__ 4468 #define __FUNCT__ "MatFileSplit" 4469 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4470 { 4471 PetscErrorCode ierr; 4472 PetscMPIInt rank; 4473 PetscInt m,N,i,rstart,nnz; 4474 size_t len; 4475 const PetscInt *indx; 4476 PetscViewer out; 4477 char *name; 4478 Mat B; 4479 const PetscScalar *values; 4480 4481 PetscFunctionBegin; 4482 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4483 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4484 /* Should this be the type of the diagonal block of A? */ 4485 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4486 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4487 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4488 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4489 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4490 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4491 for (i=0; i<m; i++) { 4492 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4493 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4494 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4495 } 4496 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4497 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4498 4499 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4500 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4501 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4502 sprintf(name,"%s.%d",outfile,rank); 4503 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4504 ierr = PetscFree(name);CHKERRQ(ierr); 4505 ierr = MatView(B,out);CHKERRQ(ierr); 4506 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4507 ierr = MatDestroy(&B);CHKERRQ(ierr); 4508 PetscFunctionReturn(0); 4509 } 4510 4511 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4514 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4515 { 4516 PetscErrorCode ierr; 4517 Mat_Merge_SeqsToMPI *merge; 4518 PetscContainer container; 4519 4520 PetscFunctionBegin; 4521 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4522 if (container) { 4523 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4524 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4525 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4526 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4527 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4528 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4529 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4530 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4531 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4532 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4533 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4534 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4535 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4536 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4537 ierr = PetscFree(merge);CHKERRQ(ierr); 4538 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4539 } 4540 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4541 PetscFunctionReturn(0); 4542 } 4543 4544 #include <../src/mat/utils/freespace.h> 4545 #include <petscbt.h> 4546 4547 #undef __FUNCT__ 4548 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4549 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4550 { 4551 PetscErrorCode ierr; 4552 MPI_Comm comm; 4553 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4554 PetscMPIInt size,rank,taga,*len_s; 4555 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4556 PetscInt proc,m; 4557 PetscInt **buf_ri,**buf_rj; 4558 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4559 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4560 MPI_Request *s_waits,*r_waits; 4561 MPI_Status *status; 4562 MatScalar *aa=a->a; 4563 MatScalar **abuf_r,*ba_i; 4564 Mat_Merge_SeqsToMPI *merge; 4565 PetscContainer container; 4566 4567 PetscFunctionBegin; 4568 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4569 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4570 4571 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4572 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4573 4574 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4575 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4576 4577 bi = merge->bi; 4578 bj = merge->bj; 4579 buf_ri = merge->buf_ri; 4580 buf_rj = merge->buf_rj; 4581 4582 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4583 owners = merge->rowmap->range; 4584 len_s = merge->len_s; 4585 4586 /* send and recv matrix values */ 4587 /*-----------------------------*/ 4588 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4589 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4590 4591 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4592 for (proc=0,k=0; proc<size; proc++) { 4593 if (!len_s[proc]) continue; 4594 i = owners[proc]; 4595 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4596 k++; 4597 } 4598 4599 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4600 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4601 ierr = PetscFree(status);CHKERRQ(ierr); 4602 4603 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4604 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4605 4606 /* insert mat values of mpimat */ 4607 /*----------------------------*/ 4608 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4609 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4610 4611 for (k=0; k<merge->nrecv; k++) { 4612 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4613 nrows = *(buf_ri_k[k]); 4614 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4615 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4616 } 4617 4618 /* set values of ba */ 4619 m = merge->rowmap->n; 4620 for (i=0; i<m; i++) { 4621 arow = owners[rank] + i; 4622 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4623 bnzi = bi[i+1] - bi[i]; 4624 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4625 4626 /* add local non-zero vals of this proc's seqmat into ba */ 4627 anzi = ai[arow+1] - ai[arow]; 4628 aj = a->j + ai[arow]; 4629 aa = a->a + ai[arow]; 4630 nextaj = 0; 4631 for (j=0; nextaj<anzi; j++) { 4632 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4633 ba_i[j] += aa[nextaj++]; 4634 } 4635 } 4636 4637 /* add received vals into ba */ 4638 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4639 /* i-th row */ 4640 if (i == *nextrow[k]) { 4641 anzi = *(nextai[k]+1) - *nextai[k]; 4642 aj = buf_rj[k] + *(nextai[k]); 4643 aa = abuf_r[k] + *(nextai[k]); 4644 nextaj = 0; 4645 for (j=0; nextaj<anzi; j++) { 4646 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4647 ba_i[j] += aa[nextaj++]; 4648 } 4649 } 4650 nextrow[k]++; nextai[k]++; 4651 } 4652 } 4653 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4654 } 4655 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4656 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4657 4658 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4659 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4660 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4661 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4662 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4663 PetscFunctionReturn(0); 4664 } 4665 4666 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4667 4668 #undef __FUNCT__ 4669 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4670 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4671 { 4672 PetscErrorCode ierr; 4673 Mat B_mpi; 4674 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4675 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4676 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4677 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4678 PetscInt len,proc,*dnz,*onz,bs,cbs; 4679 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4680 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4681 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4682 MPI_Status *status; 4683 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4684 PetscBT lnkbt; 4685 Mat_Merge_SeqsToMPI *merge; 4686 PetscContainer container; 4687 4688 PetscFunctionBegin; 4689 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4690 4691 /* make sure it is a PETSc comm */ 4692 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4693 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4694 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4695 4696 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4697 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4698 4699 /* determine row ownership */ 4700 /*---------------------------------------------------------*/ 4701 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4702 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4703 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4704 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4705 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4706 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4707 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4708 4709 m = merge->rowmap->n; 4710 owners = merge->rowmap->range; 4711 4712 /* determine the number of messages to send, their lengths */ 4713 /*---------------------------------------------------------*/ 4714 len_s = merge->len_s; 4715 4716 len = 0; /* length of buf_si[] */ 4717 merge->nsend = 0; 4718 for (proc=0; proc<size; proc++) { 4719 len_si[proc] = 0; 4720 if (proc == rank) { 4721 len_s[proc] = 0; 4722 } else { 4723 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4724 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4725 } 4726 if (len_s[proc]) { 4727 merge->nsend++; 4728 nrows = 0; 4729 for (i=owners[proc]; i<owners[proc+1]; i++) { 4730 if (ai[i+1] > ai[i]) nrows++; 4731 } 4732 len_si[proc] = 2*(nrows+1); 4733 len += len_si[proc]; 4734 } 4735 } 4736 4737 /* determine the number and length of messages to receive for ij-structure */ 4738 /*-------------------------------------------------------------------------*/ 4739 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4740 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4741 4742 /* post the Irecv of j-structure */ 4743 /*-------------------------------*/ 4744 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4745 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4746 4747 /* post the Isend of j-structure */ 4748 /*--------------------------------*/ 4749 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4750 4751 for (proc=0, k=0; proc<size; proc++) { 4752 if (!len_s[proc]) continue; 4753 i = owners[proc]; 4754 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4755 k++; 4756 } 4757 4758 /* receives and sends of j-structure are complete */ 4759 /*------------------------------------------------*/ 4760 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4761 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4762 4763 /* send and recv i-structure */ 4764 /*---------------------------*/ 4765 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4766 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4767 4768 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4769 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4770 for (proc=0,k=0; proc<size; proc++) { 4771 if (!len_s[proc]) continue; 4772 /* form outgoing message for i-structure: 4773 buf_si[0]: nrows to be sent 4774 [1:nrows]: row index (global) 4775 [nrows+1:2*nrows+1]: i-structure index 4776 */ 4777 /*-------------------------------------------*/ 4778 nrows = len_si[proc]/2 - 1; 4779 buf_si_i = buf_si + nrows+1; 4780 buf_si[0] = nrows; 4781 buf_si_i[0] = 0; 4782 nrows = 0; 4783 for (i=owners[proc]; i<owners[proc+1]; i++) { 4784 anzi = ai[i+1] - ai[i]; 4785 if (anzi) { 4786 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4787 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4788 nrows++; 4789 } 4790 } 4791 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4792 k++; 4793 buf_si += len_si[proc]; 4794 } 4795 4796 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4797 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4798 4799 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4800 for (i=0; i<merge->nrecv; i++) { 4801 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4802 } 4803 4804 ierr = PetscFree(len_si);CHKERRQ(ierr); 4805 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4806 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4807 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4808 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4809 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4810 ierr = PetscFree(status);CHKERRQ(ierr); 4811 4812 /* compute a local seq matrix in each processor */ 4813 /*----------------------------------------------*/ 4814 /* allocate bi array and free space for accumulating nonzero column info */ 4815 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4816 bi[0] = 0; 4817 4818 /* create and initialize a linked list */ 4819 nlnk = N+1; 4820 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4821 4822 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4823 len = ai[owners[rank+1]] - ai[owners[rank]]; 4824 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4825 4826 current_space = free_space; 4827 4828 /* determine symbolic info for each local row */ 4829 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4830 4831 for (k=0; k<merge->nrecv; k++) { 4832 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4833 nrows = *buf_ri_k[k]; 4834 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4835 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4836 } 4837 4838 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4839 len = 0; 4840 for (i=0; i<m; i++) { 4841 bnzi = 0; 4842 /* add local non-zero cols of this proc's seqmat into lnk */ 4843 arow = owners[rank] + i; 4844 anzi = ai[arow+1] - ai[arow]; 4845 aj = a->j + ai[arow]; 4846 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4847 bnzi += nlnk; 4848 /* add received col data into lnk */ 4849 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4850 if (i == *nextrow[k]) { /* i-th row */ 4851 anzi = *(nextai[k]+1) - *nextai[k]; 4852 aj = buf_rj[k] + *nextai[k]; 4853 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4854 bnzi += nlnk; 4855 nextrow[k]++; nextai[k]++; 4856 } 4857 } 4858 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4859 4860 /* if free space is not available, make more free space */ 4861 if (current_space->local_remaining<bnzi) { 4862 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4863 nspacedouble++; 4864 } 4865 /* copy data into free space, then initialize lnk */ 4866 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4867 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4868 4869 current_space->array += bnzi; 4870 current_space->local_used += bnzi; 4871 current_space->local_remaining -= bnzi; 4872 4873 bi[i+1] = bi[i] + bnzi; 4874 } 4875 4876 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4877 4878 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4879 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4880 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4881 4882 /* create symbolic parallel matrix B_mpi */ 4883 /*---------------------------------------*/ 4884 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4885 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4886 if (n==PETSC_DECIDE) { 4887 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4888 } else { 4889 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4890 } 4891 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4892 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4893 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4894 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4895 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4896 4897 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4898 B_mpi->assembled = PETSC_FALSE; 4899 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4900 merge->bi = bi; 4901 merge->bj = bj; 4902 merge->buf_ri = buf_ri; 4903 merge->buf_rj = buf_rj; 4904 merge->coi = NULL; 4905 merge->coj = NULL; 4906 merge->owners_co = NULL; 4907 4908 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4909 4910 /* attach the supporting struct to B_mpi for reuse */ 4911 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4912 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4913 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4914 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4915 *mpimat = B_mpi; 4916 4917 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4918 PetscFunctionReturn(0); 4919 } 4920 4921 #undef __FUNCT__ 4922 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4923 /*@C 4924 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4925 matrices from each processor 4926 4927 Collective on MPI_Comm 4928 4929 Input Parameters: 4930 + comm - the communicators the parallel matrix will live on 4931 . seqmat - the input sequential matrices 4932 . m - number of local rows (or PETSC_DECIDE) 4933 . n - number of local columns (or PETSC_DECIDE) 4934 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4935 4936 Output Parameter: 4937 . mpimat - the parallel matrix generated 4938 4939 Level: advanced 4940 4941 Notes: 4942 The dimensions of the sequential matrix in each processor MUST be the same. 4943 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4944 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4945 @*/ 4946 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4947 { 4948 PetscErrorCode ierr; 4949 PetscMPIInt size; 4950 4951 PetscFunctionBegin; 4952 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4953 if (size == 1) { 4954 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4955 if (scall == MAT_INITIAL_MATRIX) { 4956 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4957 } else { 4958 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4959 } 4960 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4961 PetscFunctionReturn(0); 4962 } 4963 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4964 if (scall == MAT_INITIAL_MATRIX) { 4965 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4966 } 4967 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4968 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4969 PetscFunctionReturn(0); 4970 } 4971 4972 #undef __FUNCT__ 4973 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4974 /*@ 4975 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4976 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4977 with MatGetSize() 4978 4979 Not Collective 4980 4981 Input Parameters: 4982 + A - the matrix 4983 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4984 4985 Output Parameter: 4986 . A_loc - the local sequential matrix generated 4987 4988 Level: developer 4989 4990 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4991 4992 @*/ 4993 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4994 { 4995 PetscErrorCode ierr; 4996 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4997 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4998 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4999 MatScalar *aa=a->a,*ba=b->a,*cam; 5000 PetscScalar *ca; 5001 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5002 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5003 PetscBool match; 5004 5005 PetscFunctionBegin; 5006 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5007 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5008 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5009 if (scall == MAT_INITIAL_MATRIX) { 5010 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5011 ci[0] = 0; 5012 for (i=0; i<am; i++) { 5013 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5014 } 5015 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5016 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5017 k = 0; 5018 for (i=0; i<am; i++) { 5019 ncols_o = bi[i+1] - bi[i]; 5020 ncols_d = ai[i+1] - ai[i]; 5021 /* off-diagonal portion of A */ 5022 for (jo=0; jo<ncols_o; jo++) { 5023 col = cmap[*bj]; 5024 if (col >= cstart) break; 5025 cj[k] = col; bj++; 5026 ca[k++] = *ba++; 5027 } 5028 /* diagonal portion of A */ 5029 for (j=0; j<ncols_d; j++) { 5030 cj[k] = cstart + *aj++; 5031 ca[k++] = *aa++; 5032 } 5033 /* off-diagonal portion of A */ 5034 for (j=jo; j<ncols_o; j++) { 5035 cj[k] = cmap[*bj++]; 5036 ca[k++] = *ba++; 5037 } 5038 } 5039 /* put together the new matrix */ 5040 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5041 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5042 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5043 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5044 mat->free_a = PETSC_TRUE; 5045 mat->free_ij = PETSC_TRUE; 5046 mat->nonew = 0; 5047 } else if (scall == MAT_REUSE_MATRIX) { 5048 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5049 ci = mat->i; cj = mat->j; cam = mat->a; 5050 for (i=0; i<am; i++) { 5051 /* off-diagonal portion of A */ 5052 ncols_o = bi[i+1] - bi[i]; 5053 for (jo=0; jo<ncols_o; jo++) { 5054 col = cmap[*bj]; 5055 if (col >= cstart) break; 5056 *cam++ = *ba++; bj++; 5057 } 5058 /* diagonal portion of A */ 5059 ncols_d = ai[i+1] - ai[i]; 5060 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5061 /* off-diagonal portion of A */ 5062 for (j=jo; j<ncols_o; j++) { 5063 *cam++ = *ba++; bj++; 5064 } 5065 } 5066 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5067 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5068 PetscFunctionReturn(0); 5069 } 5070 5071 #undef __FUNCT__ 5072 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5073 /*@C 5074 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5075 5076 Not Collective 5077 5078 Input Parameters: 5079 + A - the matrix 5080 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5081 - row, col - index sets of rows and columns to extract (or NULL) 5082 5083 Output Parameter: 5084 . A_loc - the local sequential matrix generated 5085 5086 Level: developer 5087 5088 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5089 5090 @*/ 5091 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5092 { 5093 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5094 PetscErrorCode ierr; 5095 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5096 IS isrowa,iscola; 5097 Mat *aloc; 5098 PetscBool match; 5099 5100 PetscFunctionBegin; 5101 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5102 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5103 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5104 if (!row) { 5105 start = A->rmap->rstart; end = A->rmap->rend; 5106 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5107 } else { 5108 isrowa = *row; 5109 } 5110 if (!col) { 5111 start = A->cmap->rstart; 5112 cmap = a->garray; 5113 nzA = a->A->cmap->n; 5114 nzB = a->B->cmap->n; 5115 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5116 ncols = 0; 5117 for (i=0; i<nzB; i++) { 5118 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5119 else break; 5120 } 5121 imark = i; 5122 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5123 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5124 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5125 } else { 5126 iscola = *col; 5127 } 5128 if (scall != MAT_INITIAL_MATRIX) { 5129 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5130 aloc[0] = *A_loc; 5131 } 5132 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5133 *A_loc = aloc[0]; 5134 ierr = PetscFree(aloc);CHKERRQ(ierr); 5135 if (!row) { 5136 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5137 } 5138 if (!col) { 5139 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5140 } 5141 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5142 PetscFunctionReturn(0); 5143 } 5144 5145 #undef __FUNCT__ 5146 #define __FUNCT__ "MatGetBrowsOfAcols" 5147 /*@C 5148 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5149 5150 Collective on Mat 5151 5152 Input Parameters: 5153 + A,B - the matrices in mpiaij format 5154 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5155 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5156 5157 Output Parameter: 5158 + rowb, colb - index sets of rows and columns of B to extract 5159 - B_seq - the sequential matrix generated 5160 5161 Level: developer 5162 5163 @*/ 5164 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5165 { 5166 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5167 PetscErrorCode ierr; 5168 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5169 IS isrowb,iscolb; 5170 Mat *bseq=NULL; 5171 5172 PetscFunctionBegin; 5173 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5174 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5175 } 5176 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5177 5178 if (scall == MAT_INITIAL_MATRIX) { 5179 start = A->cmap->rstart; 5180 cmap = a->garray; 5181 nzA = a->A->cmap->n; 5182 nzB = a->B->cmap->n; 5183 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5184 ncols = 0; 5185 for (i=0; i<nzB; i++) { /* row < local row index */ 5186 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5187 else break; 5188 } 5189 imark = i; 5190 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5191 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5192 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5193 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5194 } else { 5195 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5196 isrowb = *rowb; iscolb = *colb; 5197 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5198 bseq[0] = *B_seq; 5199 } 5200 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5201 *B_seq = bseq[0]; 5202 ierr = PetscFree(bseq);CHKERRQ(ierr); 5203 if (!rowb) { 5204 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5205 } else { 5206 *rowb = isrowb; 5207 } 5208 if (!colb) { 5209 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5210 } else { 5211 *colb = iscolb; 5212 } 5213 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5214 PetscFunctionReturn(0); 5215 } 5216 5217 #undef __FUNCT__ 5218 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5219 /* 5220 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5221 of the OFF-DIAGONAL portion of local A 5222 5223 Collective on Mat 5224 5225 Input Parameters: 5226 + A,B - the matrices in mpiaij format 5227 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5228 5229 Output Parameter: 5230 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5231 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5232 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5233 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5234 5235 Level: developer 5236 5237 */ 5238 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5239 { 5240 VecScatter_MPI_General *gen_to,*gen_from; 5241 PetscErrorCode ierr; 5242 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5243 Mat_SeqAIJ *b_oth; 5244 VecScatter ctx =a->Mvctx; 5245 MPI_Comm comm; 5246 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5247 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5248 PetscScalar *rvalues,*svalues; 5249 MatScalar *b_otha,*bufa,*bufA; 5250 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5251 MPI_Request *rwaits = NULL,*swaits = NULL; 5252 MPI_Status *sstatus,rstatus; 5253 PetscMPIInt jj; 5254 PetscInt *cols,sbs,rbs; 5255 PetscScalar *vals; 5256 5257 PetscFunctionBegin; 5258 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5259 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5260 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5261 } 5262 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5263 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5264 5265 gen_to = (VecScatter_MPI_General*)ctx->todata; 5266 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5267 rvalues = gen_from->values; /* holds the length of receiving row */ 5268 svalues = gen_to->values; /* holds the length of sending row */ 5269 nrecvs = gen_from->n; 5270 nsends = gen_to->n; 5271 5272 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5273 srow = gen_to->indices; /* local row index to be sent */ 5274 sstarts = gen_to->starts; 5275 sprocs = gen_to->procs; 5276 sstatus = gen_to->sstatus; 5277 sbs = gen_to->bs; 5278 rstarts = gen_from->starts; 5279 rprocs = gen_from->procs; 5280 rbs = gen_from->bs; 5281 5282 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5283 if (scall == MAT_INITIAL_MATRIX) { 5284 /* i-array */ 5285 /*---------*/ 5286 /* post receives */ 5287 for (i=0; i<nrecvs; i++) { 5288 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5289 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5290 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5291 } 5292 5293 /* pack the outgoing message */ 5294 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5295 5296 sstartsj[0] = 0; 5297 rstartsj[0] = 0; 5298 len = 0; /* total length of j or a array to be sent */ 5299 k = 0; 5300 for (i=0; i<nsends; i++) { 5301 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5302 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5303 for (j=0; j<nrows; j++) { 5304 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5305 for (l=0; l<sbs; l++) { 5306 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5307 5308 rowlen[j*sbs+l] = ncols; 5309 5310 len += ncols; 5311 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5312 } 5313 k++; 5314 } 5315 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5316 5317 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5318 } 5319 /* recvs and sends of i-array are completed */ 5320 i = nrecvs; 5321 while (i--) { 5322 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5323 } 5324 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5325 5326 /* allocate buffers for sending j and a arrays */ 5327 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5328 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5329 5330 /* create i-array of B_oth */ 5331 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5332 5333 b_othi[0] = 0; 5334 len = 0; /* total length of j or a array to be received */ 5335 k = 0; 5336 for (i=0; i<nrecvs; i++) { 5337 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5338 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5339 for (j=0; j<nrows; j++) { 5340 b_othi[k+1] = b_othi[k] + rowlen[j]; 5341 len += rowlen[j]; k++; 5342 } 5343 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5344 } 5345 5346 /* allocate space for j and a arrrays of B_oth */ 5347 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5348 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5349 5350 /* j-array */ 5351 /*---------*/ 5352 /* post receives of j-array */ 5353 for (i=0; i<nrecvs; i++) { 5354 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5355 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5356 } 5357 5358 /* pack the outgoing message j-array */ 5359 k = 0; 5360 for (i=0; i<nsends; i++) { 5361 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5362 bufJ = bufj+sstartsj[i]; 5363 for (j=0; j<nrows; j++) { 5364 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5365 for (ll=0; ll<sbs; ll++) { 5366 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5367 for (l=0; l<ncols; l++) { 5368 *bufJ++ = cols[l]; 5369 } 5370 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5371 } 5372 } 5373 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5374 } 5375 5376 /* recvs and sends of j-array are completed */ 5377 i = nrecvs; 5378 while (i--) { 5379 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5380 } 5381 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5382 } else if (scall == MAT_REUSE_MATRIX) { 5383 sstartsj = *startsj_s; 5384 rstartsj = *startsj_r; 5385 bufa = *bufa_ptr; 5386 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5387 b_otha = b_oth->a; 5388 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5389 5390 /* a-array */ 5391 /*---------*/ 5392 /* post receives of a-array */ 5393 for (i=0; i<nrecvs; i++) { 5394 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5395 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5396 } 5397 5398 /* pack the outgoing message a-array */ 5399 k = 0; 5400 for (i=0; i<nsends; i++) { 5401 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5402 bufA = bufa+sstartsj[i]; 5403 for (j=0; j<nrows; j++) { 5404 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5405 for (ll=0; ll<sbs; ll++) { 5406 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5407 for (l=0; l<ncols; l++) { 5408 *bufA++ = vals[l]; 5409 } 5410 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5411 } 5412 } 5413 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5414 } 5415 /* recvs and sends of a-array are completed */ 5416 i = nrecvs; 5417 while (i--) { 5418 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5419 } 5420 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5421 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5422 5423 if (scall == MAT_INITIAL_MATRIX) { 5424 /* put together the new matrix */ 5425 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5426 5427 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5428 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5429 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5430 b_oth->free_a = PETSC_TRUE; 5431 b_oth->free_ij = PETSC_TRUE; 5432 b_oth->nonew = 0; 5433 5434 ierr = PetscFree(bufj);CHKERRQ(ierr); 5435 if (!startsj_s || !bufa_ptr) { 5436 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5437 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5438 } else { 5439 *startsj_s = sstartsj; 5440 *startsj_r = rstartsj; 5441 *bufa_ptr = bufa; 5442 } 5443 } 5444 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5445 PetscFunctionReturn(0); 5446 } 5447 5448 #undef __FUNCT__ 5449 #define __FUNCT__ "MatGetCommunicationStructs" 5450 /*@C 5451 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5452 5453 Not Collective 5454 5455 Input Parameters: 5456 . A - The matrix in mpiaij format 5457 5458 Output Parameter: 5459 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5460 . colmap - A map from global column index to local index into lvec 5461 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5462 5463 Level: developer 5464 5465 @*/ 5466 #if defined(PETSC_USE_CTABLE) 5467 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5468 #else 5469 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5470 #endif 5471 { 5472 Mat_MPIAIJ *a; 5473 5474 PetscFunctionBegin; 5475 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5476 PetscValidPointer(lvec, 2); 5477 PetscValidPointer(colmap, 3); 5478 PetscValidPointer(multScatter, 4); 5479 a = (Mat_MPIAIJ*) A->data; 5480 if (lvec) *lvec = a->lvec; 5481 if (colmap) *colmap = a->colmap; 5482 if (multScatter) *multScatter = a->Mvctx; 5483 PetscFunctionReturn(0); 5484 } 5485 5486 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5487 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5488 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5489 5490 #undef __FUNCT__ 5491 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5492 /* 5493 Computes (B'*A')' since computing B*A directly is untenable 5494 5495 n p p 5496 ( ) ( ) ( ) 5497 m ( A ) * n ( B ) = m ( C ) 5498 ( ) ( ) ( ) 5499 5500 */ 5501 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5502 { 5503 PetscErrorCode ierr; 5504 Mat At,Bt,Ct; 5505 5506 PetscFunctionBegin; 5507 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5508 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5509 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5510 ierr = MatDestroy(&At);CHKERRQ(ierr); 5511 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5512 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5513 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5514 PetscFunctionReturn(0); 5515 } 5516 5517 #undef __FUNCT__ 5518 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5519 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5520 { 5521 PetscErrorCode ierr; 5522 PetscInt m=A->rmap->n,n=B->cmap->n; 5523 Mat Cmat; 5524 5525 PetscFunctionBegin; 5526 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5527 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5528 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5529 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5530 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5531 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5532 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5533 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5534 5535 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5536 5537 *C = Cmat; 5538 PetscFunctionReturn(0); 5539 } 5540 5541 /* ----------------------------------------------------------------*/ 5542 #undef __FUNCT__ 5543 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5544 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5545 { 5546 PetscErrorCode ierr; 5547 5548 PetscFunctionBegin; 5549 if (scall == MAT_INITIAL_MATRIX) { 5550 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5551 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5552 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5553 } 5554 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5555 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5556 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5557 PetscFunctionReturn(0); 5558 } 5559 5560 #if defined(PETSC_HAVE_MUMPS) 5561 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5562 #endif 5563 #if defined(PETSC_HAVE_PASTIX) 5564 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5565 #endif 5566 #if defined(PETSC_HAVE_SUPERLU_DIST) 5567 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5568 #endif 5569 #if defined(PETSC_HAVE_CLIQUE) 5570 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5571 #endif 5572 5573 /*MC 5574 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5575 5576 Options Database Keys: 5577 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5578 5579 Level: beginner 5580 5581 .seealso: MatCreateAIJ() 5582 M*/ 5583 5584 #undef __FUNCT__ 5585 #define __FUNCT__ "MatCreate_MPIAIJ" 5586 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5587 { 5588 Mat_MPIAIJ *b; 5589 PetscErrorCode ierr; 5590 PetscMPIInt size; 5591 5592 PetscFunctionBegin; 5593 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5594 5595 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5596 B->data = (void*)b; 5597 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5598 B->assembled = PETSC_FALSE; 5599 B->insertmode = NOT_SET_VALUES; 5600 b->size = size; 5601 5602 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5603 5604 /* build cache for off array entries formed */ 5605 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5606 5607 b->donotstash = PETSC_FALSE; 5608 b->colmap = 0; 5609 b->garray = 0; 5610 b->roworiented = PETSC_TRUE; 5611 5612 /* stuff used for matrix vector multiply */ 5613 b->lvec = NULL; 5614 b->Mvctx = NULL; 5615 5616 /* stuff for MatGetRow() */ 5617 b->rowindices = 0; 5618 b->rowvalues = 0; 5619 b->getrowactive = PETSC_FALSE; 5620 5621 /* flexible pointer used in CUSP/CUSPARSE classes */ 5622 b->spptr = NULL; 5623 5624 #if defined(PETSC_HAVE_MUMPS) 5625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5626 #endif 5627 #if defined(PETSC_HAVE_PASTIX) 5628 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5629 #endif 5630 #if defined(PETSC_HAVE_SUPERLU_DIST) 5631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5632 #endif 5633 #if defined(PETSC_HAVE_CLIQUE) 5634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5635 #endif 5636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5649 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5650 PetscFunctionReturn(0); 5651 } 5652 5653 #undef __FUNCT__ 5654 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5655 /*@ 5656 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5657 and "off-diagonal" part of the matrix in CSR format. 5658 5659 Collective on MPI_Comm 5660 5661 Input Parameters: 5662 + comm - MPI communicator 5663 . m - number of local rows (Cannot be PETSC_DECIDE) 5664 . n - This value should be the same as the local size used in creating the 5665 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5666 calculated if N is given) For square matrices n is almost always m. 5667 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5668 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5669 . i - row indices for "diagonal" portion of matrix 5670 . j - column indices 5671 . a - matrix values 5672 . oi - row indices for "off-diagonal" portion of matrix 5673 . oj - column indices 5674 - oa - matrix values 5675 5676 Output Parameter: 5677 . mat - the matrix 5678 5679 Level: advanced 5680 5681 Notes: 5682 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5683 must free the arrays once the matrix has been destroyed and not before. 5684 5685 The i and j indices are 0 based 5686 5687 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5688 5689 This sets local rows and cannot be used to set off-processor values. 5690 5691 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5692 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5693 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5694 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5695 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5696 communication if it is known that only local entries will be set. 5697 5698 .keywords: matrix, aij, compressed row, sparse, parallel 5699 5700 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5701 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5702 @*/ 5703 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5704 { 5705 PetscErrorCode ierr; 5706 Mat_MPIAIJ *maij; 5707 5708 PetscFunctionBegin; 5709 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5710 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5711 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5712 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5713 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5714 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5715 maij = (Mat_MPIAIJ*) (*mat)->data; 5716 5717 (*mat)->preallocated = PETSC_TRUE; 5718 5719 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5720 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5721 5722 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5723 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5724 5725 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5726 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5727 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5728 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5729 5730 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5731 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5732 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5733 PetscFunctionReturn(0); 5734 } 5735 5736 /* 5737 Special version for direct calls from Fortran 5738 */ 5739 #include <petsc-private/fortranimpl.h> 5740 5741 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5742 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5743 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5744 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5745 #endif 5746 5747 /* Change these macros so can be used in void function */ 5748 #undef CHKERRQ 5749 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5750 #undef SETERRQ2 5751 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5752 #undef SETERRQ3 5753 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5754 #undef SETERRQ 5755 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5756 5757 #undef __FUNCT__ 5758 #define __FUNCT__ "matsetvaluesmpiaij_" 5759 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5760 { 5761 Mat mat = *mmat; 5762 PetscInt m = *mm, n = *mn; 5763 InsertMode addv = *maddv; 5764 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5765 PetscScalar value; 5766 PetscErrorCode ierr; 5767 5768 MatCheckPreallocated(mat,1); 5769 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5770 5771 #if defined(PETSC_USE_DEBUG) 5772 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5773 #endif 5774 { 5775 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5776 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5777 PetscBool roworiented = aij->roworiented; 5778 5779 /* Some Variables required in the macro */ 5780 Mat A = aij->A; 5781 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5782 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5783 MatScalar *aa = a->a; 5784 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5785 Mat B = aij->B; 5786 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5787 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5788 MatScalar *ba = b->a; 5789 5790 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5791 PetscInt nonew = a->nonew; 5792 MatScalar *ap1,*ap2; 5793 5794 PetscFunctionBegin; 5795 for (i=0; i<m; i++) { 5796 if (im[i] < 0) continue; 5797 #if defined(PETSC_USE_DEBUG) 5798 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5799 #endif 5800 if (im[i] >= rstart && im[i] < rend) { 5801 row = im[i] - rstart; 5802 lastcol1 = -1; 5803 rp1 = aj + ai[row]; 5804 ap1 = aa + ai[row]; 5805 rmax1 = aimax[row]; 5806 nrow1 = ailen[row]; 5807 low1 = 0; 5808 high1 = nrow1; 5809 lastcol2 = -1; 5810 rp2 = bj + bi[row]; 5811 ap2 = ba + bi[row]; 5812 rmax2 = bimax[row]; 5813 nrow2 = bilen[row]; 5814 low2 = 0; 5815 high2 = nrow2; 5816 5817 for (j=0; j<n; j++) { 5818 if (roworiented) value = v[i*n+j]; 5819 else value = v[i+j*m]; 5820 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5821 if (in[j] >= cstart && in[j] < cend) { 5822 col = in[j] - cstart; 5823 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5824 } else if (in[j] < 0) continue; 5825 #if defined(PETSC_USE_DEBUG) 5826 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5827 #endif 5828 else { 5829 if (mat->was_assembled) { 5830 if (!aij->colmap) { 5831 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5832 } 5833 #if defined(PETSC_USE_CTABLE) 5834 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5835 col--; 5836 #else 5837 col = aij->colmap[in[j]] - 1; 5838 #endif 5839 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5840 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5841 col = in[j]; 5842 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5843 B = aij->B; 5844 b = (Mat_SeqAIJ*)B->data; 5845 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5846 rp2 = bj + bi[row]; 5847 ap2 = ba + bi[row]; 5848 rmax2 = bimax[row]; 5849 nrow2 = bilen[row]; 5850 low2 = 0; 5851 high2 = nrow2; 5852 bm = aij->B->rmap->n; 5853 ba = b->a; 5854 } 5855 } else col = in[j]; 5856 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5857 } 5858 } 5859 } else if (!aij->donotstash) { 5860 if (roworiented) { 5861 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5862 } else { 5863 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5864 } 5865 } 5866 } 5867 } 5868 PetscFunctionReturnVoid(); 5869 } 5870 5871