1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 6 /*MC 7 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 8 9 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 10 and MATMPIAIJ otherwise. As a result, for single process communicators, 11 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 12 for communicators controlling multiple processes. It is recommended that you call both of 13 the above preallocation routines for simplicity. 14 15 Options Database Keys: 16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 17 18 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 19 enough exist. 20 21 Level: beginner 22 23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 24 M*/ 25 26 /*MC 27 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 28 29 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 30 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 31 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 32 for communicators controlling multiple processes. It is recommended that you call both of 33 the above preallocation routines for simplicity. 34 35 Options Database Keys: 36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 37 38 Level: beginner 39 40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 41 M*/ 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 46 { 47 PetscErrorCode ierr; 48 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 49 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 51 const PetscInt *ia,*ib; 52 const MatScalar *aa,*bb; 53 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 54 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 55 56 PetscFunctionBegin; 57 *keptrows = 0; 58 ia = a->i; 59 ib = b->i; 60 for (i=0; i<m; i++) { 61 na = ia[i+1] - ia[i]; 62 nb = ib[i+1] - ib[i]; 63 if (!na && !nb) { 64 cnt++; 65 goto ok1; 66 } 67 aa = a->a + ia[i]; 68 for (j=0; j<na; j++) { 69 if (aa[j] != 0.0) goto ok1; 70 } 71 bb = b->a + ib[i]; 72 for (j=0; j <nb; j++) { 73 if (bb[j] != 0.0) goto ok1; 74 } 75 cnt++; 76 ok1:; 77 } 78 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 79 if (!n0rows) PetscFunctionReturn(0); 80 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 81 cnt = 0; 82 for (i=0; i<m; i++) { 83 na = ia[i+1] - ia[i]; 84 nb = ib[i+1] - ib[i]; 85 if (!na && !nb) continue; 86 aa = a->a + ia[i]; 87 for (j=0; j<na;j++) { 88 if (aa[j] != 0.0) { 89 rows[cnt++] = rstart + i; 90 goto ok2; 91 } 92 } 93 bb = b->a + ib[i]; 94 for (j=0; j<nb; j++) { 95 if (bb[j] != 0.0) { 96 rows[cnt++] = rstart + i; 97 goto ok2; 98 } 99 } 100 ok2:; 101 } 102 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 109 { 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 111 PetscErrorCode ierr; 112 PetscInt i,rstart,nrows,*rows; 113 114 PetscFunctionBegin; 115 *zrows = NULL; 116 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 117 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 118 for (i=0; i<nrows; i++) rows[i] += rstart; 119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 126 { 127 PetscErrorCode ierr; 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 129 PetscInt i,n,*garray = aij->garray; 130 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 131 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 132 PetscReal *work; 133 134 PetscFunctionBegin; 135 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 136 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 137 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 CHKMEMQ; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 CHKMEMQ; 349 PetscFunctionReturn(0); 350 } 351 352 /* 353 Local utility routine that creates a mapping from the global column 354 number to the local number in the off-diagonal part of the local 355 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 356 a slightly higher hash table cost; without it it is not scalable (each processor 357 has an order N integer array but is fast to acess. 358 */ 359 #undef __FUNCT__ 360 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 361 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 362 { 363 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 364 PetscErrorCode ierr; 365 PetscInt n = aij->B->cmap->n,i; 366 367 PetscFunctionBegin; 368 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 369 #if defined(PETSC_USE_CTABLE) 370 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 371 for (i=0; i<n; i++) { 372 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 373 } 374 #else 375 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 376 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 378 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 379 #endif 380 PetscFunctionReturn(0); 381 } 382 383 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 384 { \ 385 if (col <= lastcol1) low1 = 0; \ 386 else high1 = nrow1; \ 387 lastcol1 = col;\ 388 while (high1-low1 > 5) { \ 389 t = (low1+high1)/2; \ 390 if (rp1[t] > col) high1 = t; \ 391 else low1 = t; \ 392 } \ 393 for (_i=low1; _i<high1; _i++) { \ 394 if (rp1[_i] > col) break; \ 395 if (rp1[_i] == col) { \ 396 if (addv == ADD_VALUES) ap1[_i] += value; \ 397 else ap1[_i] = value; \ 398 goto a_noinsert; \ 399 } \ 400 } \ 401 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 402 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 403 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 404 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 405 N = nrow1++ - 1; a->nz++; high1++; \ 406 /* shift up all the later entries in this row */ \ 407 for (ii=N; ii>=_i; ii--) { \ 408 rp1[ii+1] = rp1[ii]; \ 409 ap1[ii+1] = ap1[ii]; \ 410 } \ 411 rp1[_i] = col; \ 412 ap1[_i] = value; \ 413 a_noinsert: ; \ 414 ailen[row] = nrow1; \ 415 } 416 417 418 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 419 { \ 420 if (col <= lastcol2) low2 = 0; \ 421 else high2 = nrow2; \ 422 lastcol2 = col; \ 423 while (high2-low2 > 5) { \ 424 t = (low2+high2)/2; \ 425 if (rp2[t] > col) high2 = t; \ 426 else low2 = t; \ 427 } \ 428 for (_i=low2; _i<high2; _i++) { \ 429 if (rp2[_i] > col) break; \ 430 if (rp2[_i] == col) { \ 431 if (addv == ADD_VALUES) ap2[_i] += value; \ 432 else ap2[_i] = value; \ 433 goto b_noinsert; \ 434 } \ 435 } \ 436 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 438 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 439 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 440 N = nrow2++ - 1; b->nz++; high2++; \ 441 /* shift up all the later entries in this row */ \ 442 for (ii=N; ii>=_i; ii--) { \ 443 rp2[ii+1] = rp2[ii]; \ 444 ap2[ii+1] = ap2[ii]; \ 445 } \ 446 rp2[_i] = col; \ 447 ap2[_i] = value; \ 448 b_noinsert: ; \ 449 bilen[row] = nrow2; \ 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 454 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 455 { 456 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 457 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 458 PetscErrorCode ierr; 459 PetscInt l,*garray = mat->garray,diag; 460 461 PetscFunctionBegin; 462 /* code only works for square matrices A */ 463 464 /* find size of row to the left of the diagonal part */ 465 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 466 row = row - diag; 467 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 468 if (garray[b->j[b->i[row]+l]] > diag) break; 469 } 470 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 471 472 /* diagonal part */ 473 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 474 475 /* right of diagonal part */ 476 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 477 PetscFunctionReturn(0); 478 } 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatSetValues_MPIAIJ" 482 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 483 { 484 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 485 PetscScalar value; 486 PetscErrorCode ierr; 487 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 488 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 489 PetscBool roworiented = aij->roworiented; 490 491 /* Some Variables required in the macro */ 492 Mat A = aij->A; 493 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 494 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 495 MatScalar *aa = a->a; 496 PetscBool ignorezeroentries = a->ignorezeroentries; 497 Mat B = aij->B; 498 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 499 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 500 MatScalar *ba = b->a; 501 502 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 503 PetscInt nonew; 504 MatScalar *ap1,*ap2; 505 506 PetscFunctionBegin; 507 if (v) PetscValidScalarPointer(v,6); 508 for (i=0; i<m; i++) { 509 if (im[i] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 512 #endif 513 if (im[i] >= rstart && im[i] < rend) { 514 row = im[i] - rstart; 515 lastcol1 = -1; 516 rp1 = aj + ai[row]; 517 ap1 = aa + ai[row]; 518 rmax1 = aimax[row]; 519 nrow1 = ailen[row]; 520 low1 = 0; 521 high1 = nrow1; 522 lastcol2 = -1; 523 rp2 = bj + bi[row]; 524 ap2 = ba + bi[row]; 525 rmax2 = bimax[row]; 526 nrow2 = bilen[row]; 527 low2 = 0; 528 high2 = nrow2; 529 530 for (j=0; j<n; j++) { 531 if (v) { 532 if (roworiented) value = v[i*n+j]; 533 else value = v[i+j*m]; 534 } else value = 0.0; 535 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 536 if (in[j] >= cstart && in[j] < cend) { 537 col = in[j] - cstart; 538 nonew = a->nonew; 539 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 540 } else if (in[j] < 0) continue; 541 #if defined(PETSC_USE_DEBUG) 542 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 543 #endif 544 else { 545 if (mat->was_assembled) { 546 if (!aij->colmap) { 547 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 548 } 549 #if defined(PETSC_USE_CTABLE) 550 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 551 col--; 552 #else 553 col = aij->colmap[in[j]] - 1; 554 #endif 555 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 556 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 557 col = in[j]; 558 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 559 B = aij->B; 560 b = (Mat_SeqAIJ*)B->data; 561 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 bm = aij->B->rmap->n; 569 ba = b->a; 570 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 571 } else col = in[j]; 572 nonew = b->nonew; 573 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 574 } 575 } 576 } else { 577 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 578 if (!aij->donotstash) { 579 mat->assembled = PETSC_FALSE; 580 if (roworiented) { 581 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 582 } else { 583 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 584 } 585 } 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatGetValues_MPIAIJ" 593 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 594 { 595 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 596 PetscErrorCode ierr; 597 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 598 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 599 600 PetscFunctionBegin; 601 for (i=0; i<m; i++) { 602 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 603 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 604 if (idxm[i] >= rstart && idxm[i] < rend) { 605 row = idxm[i] - rstart; 606 for (j=0; j<n; j++) { 607 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 608 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 609 if (idxn[j] >= cstart && idxn[j] < cend) { 610 col = idxn[j] - cstart; 611 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } else { 613 if (!aij->colmap) { 614 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 615 } 616 #if defined(PETSC_USE_CTABLE) 617 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 618 col--; 619 #else 620 col = aij->colmap[idxn[j]] - 1; 621 #endif 622 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 623 else { 624 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 625 } 626 } 627 } 628 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 629 } 630 PetscFunctionReturn(0); 631 } 632 633 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 634 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 637 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 638 { 639 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 640 PetscErrorCode ierr; 641 PetscInt nstash,reallocs; 642 InsertMode addv; 643 644 PetscFunctionBegin; 645 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 646 647 /* make sure all processors are either in INSERTMODE or ADDMODE */ 648 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 649 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 650 mat->insertmode = addv; /* in case this processor had no cache */ 651 652 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 653 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 654 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 660 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 661 { 662 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 663 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 664 PetscErrorCode ierr; 665 PetscMPIInt n; 666 PetscInt i,j,rstart,ncols,flg; 667 PetscInt *row,*col; 668 PetscBool other_disassembled; 669 PetscScalar *val; 670 InsertMode addv = mat->insertmode; 671 672 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 673 674 PetscFunctionBegin; 675 if (!aij->donotstash && !mat->nooffprocentries) { 676 while (1) { 677 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 678 if (!flg) break; 679 680 for (i=0; i<n; ) { 681 /* Now identify the consecutive vals belonging to the same row */ 682 for (j=i,rstart=row[j]; j<n; j++) { 683 if (row[j] != rstart) break; 684 } 685 if (j < n) ncols = j-i; 686 else ncols = n-i; 687 /* Now assemble all these values with a single function call */ 688 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 689 690 i = j; 691 } 692 } 693 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 694 } 695 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 696 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 697 698 /* determine if any processor has disassembled, if so we must 699 also disassemble ourselfs, in order that we may reassemble. */ 700 /* 701 if nonzero structure of submatrix B cannot change then we know that 702 no processor disassembled thus we can skip this stuff 703 */ 704 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 705 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 706 if (mat->was_assembled && !other_disassembled) { 707 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 708 } 709 } 710 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 711 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 712 } 713 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 715 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 716 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 717 718 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 719 720 aij->rowvalues = 0; 721 722 /* used by MatAXPY() */ 723 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 724 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 725 726 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 727 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 733 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 734 { 735 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 740 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatZeroRows_MPIAIJ" 746 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 747 { 748 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 751 PetscInt i,*owners = A->rmap->range; 752 PetscInt *nprocs,j,idx,nsends,row; 753 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 754 PetscInt *rvalues,count,base,slen,*source; 755 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 756 MPI_Comm comm; 757 MPI_Request *send_waits,*recv_waits; 758 MPI_Status recv_status,*send_status; 759 const PetscScalar *xx; 760 PetscScalar *bb; 761 #if defined(PETSC_DEBUG) 762 PetscBool found = PETSC_FALSE; 763 #endif 764 765 PetscFunctionBegin; 766 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 767 /* first count number of contributors to each processor */ 768 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 769 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 770 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 771 j = 0; 772 for (i=0; i<N; i++) { 773 if (lastidx > (idx = rows[i])) j = 0; 774 lastidx = idx; 775 for (; j<size; j++) { 776 if (idx >= owners[j] && idx < owners[j+1]) { 777 nprocs[2*j]++; 778 nprocs[2*j+1] = 1; 779 owner[i] = j; 780 #if defined(PETSC_DEBUG) 781 found = PETSC_TRUE; 782 #endif 783 break; 784 } 785 } 786 #if defined(PETSC_DEBUG) 787 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 788 found = PETSC_FALSE; 789 #endif 790 } 791 nsends = 0; 792 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 793 794 if (A->nooffproczerorows) { 795 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 796 nrecvs = nsends; 797 nmax = N; 798 } else { 799 /* inform other processors of number of messages and max length*/ 800 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 801 } 802 803 /* post receives: */ 804 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 805 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 806 for (i=0; i<nrecvs; i++) { 807 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 808 } 809 810 /* do sends: 811 1) starts[i] gives the starting index in svalues for stuff going to 812 the ith processor 813 */ 814 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 815 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 816 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 817 818 starts[0] = 0; 819 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 820 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 821 822 starts[0] = 0; 823 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 824 count = 0; 825 for (i=0; i<size; i++) { 826 if (nprocs[2*i+1]) { 827 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 828 } 829 } 830 ierr = PetscFree(starts);CHKERRQ(ierr); 831 832 base = owners[rank]; 833 834 /* wait on receives */ 835 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 836 count = nrecvs; slen = 0; 837 while (count) { 838 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 839 /* unpack receives into our local space */ 840 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 841 842 source[imdex] = recv_status.MPI_SOURCE; 843 lens[imdex] = n; 844 slen += n; 845 count--; 846 } 847 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 848 849 /* move the data into the send scatter */ 850 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 851 count = 0; 852 for (i=0; i<nrecvs; i++) { 853 values = rvalues + i*nmax; 854 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 855 } 856 ierr = PetscFree(rvalues);CHKERRQ(ierr); 857 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 858 ierr = PetscFree(owner);CHKERRQ(ierr); 859 ierr = PetscFree(nprocs);CHKERRQ(ierr); 860 861 /* fix right hand side if needed */ 862 if (x && b) { 863 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 864 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 865 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 866 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 867 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 868 } 869 /* 870 Zero the required rows. If the "diagonal block" of the matrix 871 is square and the user wishes to set the diagonal we use separate 872 code so that MatSetValues() is not called for each diagonal allocating 873 new memory, thus calling lots of mallocs and slowing things down. 874 875 */ 876 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 877 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 878 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 879 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 880 } else if (diag != 0.0) { 881 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 882 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 883 for (i = 0; i < slen; i++) { 884 row = lrows[i] + rstart; 885 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 886 } 887 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 889 } else { 890 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 891 } 892 ierr = PetscFree(lrows);CHKERRQ(ierr); 893 894 /* wait on sends */ 895 if (nsends) { 896 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 897 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 898 ierr = PetscFree(send_status);CHKERRQ(ierr); 899 } 900 ierr = PetscFree(send_waits);CHKERRQ(ierr); 901 ierr = PetscFree(svalues);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 907 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 908 { 909 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 910 PetscErrorCode ierr; 911 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 912 PetscInt i,*owners = A->rmap->range; 913 PetscInt *nprocs,j,idx,nsends; 914 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 915 PetscInt *rvalues,count,base,slen,*source; 916 PetscInt *lens,*lrows,*values,m; 917 MPI_Comm comm; 918 MPI_Request *send_waits,*recv_waits; 919 MPI_Status recv_status,*send_status; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 #if defined(PETSC_DEBUG) 927 PetscBool found = PETSC_FALSE; 928 #endif 929 930 PetscFunctionBegin; 931 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 932 /* first count number of contributors to each processor */ 933 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 934 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 935 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 936 j = 0; 937 for (i=0; i<N; i++) { 938 if (lastidx > (idx = rows[i])) j = 0; 939 lastidx = idx; 940 for (; j<size; j++) { 941 if (idx >= owners[j] && idx < owners[j+1]) { 942 nprocs[2*j]++; 943 nprocs[2*j+1] = 1; 944 owner[i] = j; 945 #if defined(PETSC_DEBUG) 946 found = PETSC_TRUE; 947 #endif 948 break; 949 } 950 } 951 #if defined(PETSC_DEBUG) 952 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 953 found = PETSC_FALSE; 954 #endif 955 } 956 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 957 958 /* inform other processors of number of messages and max length*/ 959 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 960 961 /* post receives: */ 962 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 963 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 964 for (i=0; i<nrecvs; i++) { 965 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 966 } 967 968 /* do sends: 969 1) starts[i] gives the starting index in svalues for stuff going to 970 the ith processor 971 */ 972 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 973 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 974 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 975 976 starts[0] = 0; 977 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 978 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 979 980 starts[0] = 0; 981 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 982 count = 0; 983 for (i=0; i<size; i++) { 984 if (nprocs[2*i+1]) { 985 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 986 } 987 } 988 ierr = PetscFree(starts);CHKERRQ(ierr); 989 990 base = owners[rank]; 991 992 /* wait on receives */ 993 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 994 count = nrecvs; slen = 0; 995 while (count) { 996 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 997 /* unpack receives into our local space */ 998 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 999 1000 source[imdex] = recv_status.MPI_SOURCE; 1001 lens[imdex] = n; 1002 slen += n; 1003 count--; 1004 } 1005 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1006 1007 /* move the data into the send scatter */ 1008 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1009 count = 0; 1010 for (i=0; i<nrecvs; i++) { 1011 values = rvalues + i*nmax; 1012 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1013 } 1014 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1015 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1016 ierr = PetscFree(owner);CHKERRQ(ierr); 1017 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1018 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1019 1020 /* zero diagonal part of matrix */ 1021 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1022 1023 /* handle off diagonal part of matrix */ 1024 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1025 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1026 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1027 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1028 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1029 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1032 if (x) { 1033 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1036 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1037 } 1038 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1039 1040 /* remove zeroed rows of off diagonal matrix */ 1041 ii = aij->i; 1042 for (i=0; i<slen; i++) { 1043 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1044 } 1045 1046 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1047 if (aij->compressedrow.use) { 1048 m = aij->compressedrow.nrows; 1049 ii = aij->compressedrow.i; 1050 ridx = aij->compressedrow.rindex; 1051 for (i=0; i<m; i++) { 1052 n = ii[i+1] - ii[i]; 1053 aj = aij->j + ii[i]; 1054 aa = aij->a + ii[i]; 1055 1056 for (j=0; j<n; j++) { 1057 if (PetscAbsScalar(mask[*aj])) { 1058 if (b) bb[*ridx] -= *aa*xx[*aj]; 1059 *aa = 0.0; 1060 } 1061 aa++; 1062 aj++; 1063 } 1064 ridx++; 1065 } 1066 } else { /* do not use compressed row format */ 1067 m = l->B->rmap->n; 1068 for (i=0; i<m; i++) { 1069 n = ii[i+1] - ii[i]; 1070 aj = aij->j + ii[i]; 1071 aa = aij->a + ii[i]; 1072 for (j=0; j<n; j++) { 1073 if (PetscAbsScalar(mask[*aj])) { 1074 if (b) bb[i] -= *aa*xx[*aj]; 1075 *aa = 0.0; 1076 } 1077 aa++; 1078 aj++; 1079 } 1080 } 1081 } 1082 if (x) { 1083 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1084 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1085 } 1086 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1087 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1088 ierr = PetscFree(lrows);CHKERRQ(ierr); 1089 1090 /* wait on sends */ 1091 if (nsends) { 1092 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1093 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1094 ierr = PetscFree(send_status);CHKERRQ(ierr); 1095 } 1096 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1097 ierr = PetscFree(svalues);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatMult_MPIAIJ" 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 1109 PetscFunctionBegin; 1110 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1111 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1112 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1114 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1122 { 1123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1133 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1134 { 1135 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1136 PetscErrorCode ierr; 1137 1138 PetscFunctionBegin; 1139 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1148 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1149 { 1150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1151 PetscErrorCode ierr; 1152 PetscBool merged; 1153 1154 PetscFunctionBegin; 1155 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1156 /* do nondiagonal part */ 1157 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1158 if (!merged) { 1159 /* send it on its way */ 1160 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1161 /* do local part */ 1162 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1163 /* receive remote parts: note this assumes the values are not actually */ 1164 /* added in yy until the next line, */ 1165 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1166 } else { 1167 /* do local part */ 1168 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1169 /* send it on its way */ 1170 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1171 /* values actually were received in the Begin() but we need to call this nop */ 1172 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1173 } 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 /* Easy test: symmetric diagonal block */ 1191 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1192 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1193 if (!*f) PetscFunctionReturn(0); 1194 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1196 if (size == 1) PetscFunctionReturn(0); 1197 1198 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1199 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1200 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1201 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1202 for (i=0; i<first; i++) notme[i] = i; 1203 for (i=last; i<M; i++) notme[i-last+first] = i; 1204 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1205 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1206 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1207 Aoff = Aoffs[0]; 1208 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1209 Boff = Boffs[0]; 1210 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1215 ierr = PetscFree(notme);CHKERRQ(ierr); 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1221 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1222 { 1223 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1224 PetscErrorCode ierr; 1225 1226 PetscFunctionBegin; 1227 /* do nondiagonal part */ 1228 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1229 /* send it on its way */ 1230 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1231 /* do local part */ 1232 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1233 /* receive remote parts */ 1234 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 /* 1239 This only works correctly for square matrices where the subblock A->A is the 1240 diagonal block 1241 */ 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1244 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1245 { 1246 PetscErrorCode ierr; 1247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1248 1249 PetscFunctionBegin; 1250 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1251 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1252 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatScale_MPIAIJ" 1258 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1259 { 1260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1261 PetscErrorCode ierr; 1262 1263 PetscFunctionBegin; 1264 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1265 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatDestroy_MPIAIJ" 1271 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1272 { 1273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 #if defined(PETSC_USE_LOG) 1278 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1279 #endif 1280 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1281 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1283 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1284 #if defined(PETSC_USE_CTABLE) 1285 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1286 #else 1287 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1288 #endif 1289 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1290 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1291 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1292 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1293 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1294 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1295 1296 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 PetscErrorCode ierr; 1316 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1317 int fd; 1318 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1319 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1320 PetscScalar *column_values; 1321 PetscInt message_count,flowcontrolcount; 1322 FILE *file; 1323 1324 PetscFunctionBegin; 1325 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1326 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1327 nz = A->nz + B->nz; 1328 if (!rank) { 1329 header[0] = MAT_FILE_CLASSID; 1330 header[1] = mat->rmap->N; 1331 header[2] = mat->cmap->N; 1332 1333 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1334 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1335 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 /* get largest number of rows any processor has */ 1337 rlen = mat->rmap->n; 1338 range = mat->rmap->range; 1339 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1340 } else { 1341 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1342 rlen = mat->rmap->n; 1343 } 1344 1345 /* load up the local row counts */ 1346 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1347 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1348 1349 /* store the row lengths to the file */ 1350 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1351 if (!rank) { 1352 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1353 for (i=1; i<size; i++) { 1354 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1355 rlen = range[i+1] - range[i]; 1356 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1357 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 } 1359 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1360 } else { 1361 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1362 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1363 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1364 } 1365 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1366 1367 /* load up the local column indices */ 1368 nzmax = nz; /* th processor needs space a largest processor needs */ 1369 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1370 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1371 cnt = 0; 1372 for (i=0; i<mat->rmap->n; i++) { 1373 for (j=B->i[i]; j<B->i[i+1]; j++) { 1374 if ((col = garray[B->j[j]]) > cstart) break; 1375 column_indices[cnt++] = col; 1376 } 1377 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1378 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1379 } 1380 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1381 1382 /* store the column indices to the file */ 1383 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1384 if (!rank) { 1385 MPI_Status status; 1386 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1387 for (i=1; i<size; i++) { 1388 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1389 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1390 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1391 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1392 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1393 } 1394 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1395 } else { 1396 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1397 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1399 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1400 } 1401 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1402 1403 /* load up the local column values */ 1404 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1405 cnt = 0; 1406 for (i=0; i<mat->rmap->n; i++) { 1407 for (j=B->i[i]; j<B->i[i+1]; j++) { 1408 if (garray[B->j[j]] > cstart) break; 1409 column_values[cnt++] = B->a[j]; 1410 } 1411 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1412 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1413 } 1414 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1415 1416 /* store the column values to the file */ 1417 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1418 if (!rank) { 1419 MPI_Status status; 1420 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1421 for (i=1; i<size; i++) { 1422 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1423 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1424 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1425 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1426 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1427 } 1428 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1429 } else { 1430 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1431 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1433 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1434 } 1435 ierr = PetscFree(column_values);CHKERRQ(ierr); 1436 1437 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1438 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #include <petscdraw.h> 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1445 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1446 { 1447 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1448 PetscErrorCode ierr; 1449 PetscMPIInt rank = aij->rank,size = aij->size; 1450 PetscBool isdraw,iascii,isbinary; 1451 PetscViewer sviewer; 1452 PetscViewerFormat format; 1453 1454 PetscFunctionBegin; 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1457 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1458 if (iascii) { 1459 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1460 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1461 MatInfo info; 1462 PetscBool inodes; 1463 1464 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1465 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1466 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1468 if (!inodes) { 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1470 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1471 } else { 1472 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1473 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1474 } 1475 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1477 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1479 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1481 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1482 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1485 PetscInt inodecount,inodelimit,*inodes; 1486 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1487 if (inodes) { 1488 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1489 } else { 1490 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1491 } 1492 PetscFunctionReturn(0); 1493 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1494 PetscFunctionReturn(0); 1495 } 1496 } else if (isbinary) { 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1502 } 1503 PetscFunctionReturn(0); 1504 } else if (isdraw) { 1505 PetscDraw draw; 1506 PetscBool isnull; 1507 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1508 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1509 } 1510 1511 if (size == 1) { 1512 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1513 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1514 } else { 1515 /* assemble the entire matrix onto first processor. */ 1516 Mat A; 1517 Mat_SeqAIJ *Aloc; 1518 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1519 MatScalar *a; 1520 1521 if (mat->rmap->N > 1024) { 1522 PetscBool flg = PETSC_FALSE; 1523 1524 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1525 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1526 } 1527 1528 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1529 if (!rank) { 1530 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1531 } else { 1532 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1533 } 1534 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1535 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1536 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1537 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1538 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1539 1540 /* copy over the A part */ 1541 Aloc = (Mat_SeqAIJ*)aij->A->data; 1542 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1545 for (i=0; i<m; i++) { 1546 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1547 row++; 1548 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1549 } 1550 aj = Aloc->j; 1551 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1552 1553 /* copy over the B part */ 1554 Aloc = (Mat_SeqAIJ*)aij->B->data; 1555 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1556 row = mat->rmap->rstart; 1557 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1558 ct = cols; 1559 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1560 for (i=0; i<m; i++) { 1561 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1562 row++; 1563 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1564 } 1565 ierr = PetscFree(ct);CHKERRQ(ierr); 1566 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1568 /* 1569 Everyone has to call to draw the matrix since the graphics waits are 1570 synchronized across all processors that share the PetscDraw object 1571 */ 1572 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1573 if (!rank) { 1574 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1575 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1576 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1577 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1578 } 1579 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1580 ierr = MatDestroy(&A);CHKERRQ(ierr); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatView_MPIAIJ" 1587 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1588 { 1589 PetscErrorCode ierr; 1590 PetscBool iascii,isdraw,issocket,isbinary; 1591 1592 PetscFunctionBegin; 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1596 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1597 if (iascii || isdraw || isbinary || issocket) { 1598 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1599 } 1600 PetscFunctionReturn(0); 1601 } 1602 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatSOR_MPIAIJ" 1605 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1606 { 1607 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1608 PetscErrorCode ierr; 1609 Vec bb1 = 0; 1610 PetscBool hasop; 1611 1612 PetscFunctionBegin; 1613 if (flag == SOR_APPLY_UPPER) { 1614 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1619 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1620 } 1621 1622 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1623 if (flag & SOR_ZERO_INITIAL_GUESS) { 1624 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1625 its--; 1626 } 1627 1628 while (its--) { 1629 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1631 1632 /* update rhs: bb1 = bb - B*x */ 1633 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1634 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1635 1636 /* local sweep */ 1637 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1638 } 1639 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 while (its--) { 1645 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 1648 /* update rhs: bb1 = bb - B*x */ 1649 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1650 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1651 1652 /* local sweep */ 1653 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1654 } 1655 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1656 if (flag & SOR_ZERO_INITIAL_GUESS) { 1657 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1658 its--; 1659 } 1660 while (its--) { 1661 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 1664 /* update rhs: bb1 = bb - B*x */ 1665 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1666 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1667 1668 /* local sweep */ 1669 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1670 } 1671 } else if (flag & SOR_EISENSTAT) { 1672 Vec xx1; 1673 1674 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1675 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1676 1677 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 if (!mat->diag) { 1680 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1681 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1682 } 1683 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1684 if (hasop) { 1685 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1686 } else { 1687 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1688 } 1689 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1690 1691 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1692 1693 /* local sweep */ 1694 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1695 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1696 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1697 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1698 1699 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatPermute_MPIAIJ" 1705 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1706 { 1707 Mat aA,aB,Aperm; 1708 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1709 PetscScalar *aa,*ba; 1710 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1711 PetscSF rowsf,sf; 1712 IS parcolp = NULL; 1713 PetscBool done; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1718 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1719 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1720 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1721 1722 /* Invert row permutation to find out where my rows should go */ 1723 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1724 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1725 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1726 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1727 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1728 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1729 1730 /* Invert column permutation to find out where my columns should go */ 1731 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1732 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1733 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1734 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1735 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1737 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1738 1739 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1740 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1741 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1742 1743 /* Find out where my gcols should go */ 1744 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1745 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1746 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1747 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1748 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1749 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1751 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1752 1753 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1754 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1757 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1758 for (i=0; i<m; i++) { 1759 PetscInt row = rdest[i],rowner; 1760 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1761 for (j=ai[i]; j<ai[i+1]; j++) { 1762 PetscInt cowner,col = cdest[aj[j]]; 1763 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1764 if (rowner == cowner) dnnz[i]++; 1765 else onnz[i]++; 1766 } 1767 for (j=bi[i]; j<bi[i+1]; j++) { 1768 PetscInt cowner,col = gcdest[bj[j]]; 1769 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1770 if (rowner == cowner) dnnz[i]++; 1771 else onnz[i]++; 1772 } 1773 } 1774 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1776 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1780 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1781 1782 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1784 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1785 for (i=0; i<m; i++) { 1786 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1787 PetscInt rowlen; 1788 rowlen = ai[i+1] - ai[i]; 1789 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1790 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1791 rowlen = bi[i+1] - bi[i]; 1792 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1793 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1794 } 1795 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1798 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1800 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1801 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1802 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1803 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1804 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1805 *B = Aperm; 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1811 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1812 { 1813 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1814 Mat A = mat->A,B = mat->B; 1815 PetscErrorCode ierr; 1816 PetscReal isend[5],irecv[5]; 1817 1818 PetscFunctionBegin; 1819 info->block_size = 1.0; 1820 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1821 1822 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1823 isend[3] = info->memory; isend[4] = info->mallocs; 1824 1825 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1826 1827 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1828 isend[3] += info->memory; isend[4] += info->mallocs; 1829 if (flag == MAT_LOCAL) { 1830 info->nz_used = isend[0]; 1831 info->nz_allocated = isend[1]; 1832 info->nz_unneeded = isend[2]; 1833 info->memory = isend[3]; 1834 info->mallocs = isend[4]; 1835 } else if (flag == MAT_GLOBAL_MAX) { 1836 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1837 1838 info->nz_used = irecv[0]; 1839 info->nz_allocated = irecv[1]; 1840 info->nz_unneeded = irecv[2]; 1841 info->memory = irecv[3]; 1842 info->mallocs = irecv[4]; 1843 } else if (flag == MAT_GLOBAL_SUM) { 1844 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1845 1846 info->nz_used = irecv[0]; 1847 info->nz_allocated = irecv[1]; 1848 info->nz_unneeded = irecv[2]; 1849 info->memory = irecv[3]; 1850 info->mallocs = irecv[4]; 1851 } 1852 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1853 info->fill_ratio_needed = 0; 1854 info->factor_mallocs = 0; 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatSetOption_MPIAIJ" 1860 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1861 { 1862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1863 PetscErrorCode ierr; 1864 1865 PetscFunctionBegin; 1866 switch (op) { 1867 case MAT_NEW_NONZERO_LOCATIONS: 1868 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1869 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1870 case MAT_KEEP_NONZERO_PATTERN: 1871 case MAT_NEW_NONZERO_LOCATION_ERR: 1872 case MAT_USE_INODES: 1873 case MAT_IGNORE_ZERO_ENTRIES: 1874 MatCheckPreallocated(A,1); 1875 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1876 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1877 break; 1878 case MAT_ROW_ORIENTED: 1879 a->roworiented = flg; 1880 1881 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1882 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1883 break; 1884 case MAT_NEW_DIAGONALS: 1885 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1886 break; 1887 case MAT_IGNORE_OFF_PROC_ENTRIES: 1888 a->donotstash = flg; 1889 break; 1890 case MAT_SPD: 1891 A->spd_set = PETSC_TRUE; 1892 A->spd = flg; 1893 if (flg) { 1894 A->symmetric = PETSC_TRUE; 1895 A->structurally_symmetric = PETSC_TRUE; 1896 A->symmetric_set = PETSC_TRUE; 1897 A->structurally_symmetric_set = PETSC_TRUE; 1898 } 1899 break; 1900 case MAT_SYMMETRIC: 1901 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1902 break; 1903 case MAT_STRUCTURALLY_SYMMETRIC: 1904 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1905 break; 1906 case MAT_HERMITIAN: 1907 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1908 break; 1909 case MAT_SYMMETRY_ETERNAL: 1910 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1911 break; 1912 default: 1913 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatGetRow_MPIAIJ" 1920 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1921 { 1922 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1923 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1924 PetscErrorCode ierr; 1925 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1926 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1927 PetscInt *cmap,*idx_p; 1928 1929 PetscFunctionBegin; 1930 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1931 mat->getrowactive = PETSC_TRUE; 1932 1933 if (!mat->rowvalues && (idx || v)) { 1934 /* 1935 allocate enough space to hold information from the longest row. 1936 */ 1937 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1938 PetscInt max = 1,tmp; 1939 for (i=0; i<matin->rmap->n; i++) { 1940 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1941 if (max < tmp) max = tmp; 1942 } 1943 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1944 } 1945 1946 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1947 lrow = row - rstart; 1948 1949 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1950 if (!v) {pvA = 0; pvB = 0;} 1951 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1952 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1953 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1954 nztot = nzA + nzB; 1955 1956 cmap = mat->garray; 1957 if (v || idx) { 1958 if (nztot) { 1959 /* Sort by increasing column numbers, assuming A and B already sorted */ 1960 PetscInt imark = -1; 1961 if (v) { 1962 *v = v_p = mat->rowvalues; 1963 for (i=0; i<nzB; i++) { 1964 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1965 else break; 1966 } 1967 imark = i; 1968 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1969 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1970 } 1971 if (idx) { 1972 *idx = idx_p = mat->rowindices; 1973 if (imark > -1) { 1974 for (i=0; i<imark; i++) { 1975 idx_p[i] = cmap[cworkB[i]]; 1976 } 1977 } else { 1978 for (i=0; i<nzB; i++) { 1979 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1980 else break; 1981 } 1982 imark = i; 1983 } 1984 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1985 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1986 } 1987 } else { 1988 if (idx) *idx = 0; 1989 if (v) *v = 0; 1990 } 1991 } 1992 *nz = nztot; 1993 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1994 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2000 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2001 { 2002 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2003 2004 PetscFunctionBegin; 2005 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2006 aij->getrowactive = PETSC_FALSE; 2007 PetscFunctionReturn(0); 2008 } 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatNorm_MPIAIJ" 2012 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2013 { 2014 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2015 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2016 PetscErrorCode ierr; 2017 PetscInt i,j,cstart = mat->cmap->rstart; 2018 PetscReal sum = 0.0; 2019 MatScalar *v; 2020 2021 PetscFunctionBegin; 2022 if (aij->size == 1) { 2023 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2024 } else { 2025 if (type == NORM_FROBENIUS) { 2026 v = amat->a; 2027 for (i=0; i<amat->nz; i++) { 2028 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2029 } 2030 v = bmat->a; 2031 for (i=0; i<bmat->nz; i++) { 2032 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2033 } 2034 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2035 *norm = PetscSqrtReal(*norm); 2036 } else if (type == NORM_1) { /* max column norm */ 2037 PetscReal *tmp,*tmp2; 2038 PetscInt *jj,*garray = aij->garray; 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2040 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2041 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2042 *norm = 0.0; 2043 v = amat->a; jj = amat->j; 2044 for (j=0; j<amat->nz; j++) { 2045 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2046 } 2047 v = bmat->a; jj = bmat->j; 2048 for (j=0; j<bmat->nz; j++) { 2049 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2050 } 2051 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2052 for (j=0; j<mat->cmap->N; j++) { 2053 if (tmp2[j] > *norm) *norm = tmp2[j]; 2054 } 2055 ierr = PetscFree(tmp);CHKERRQ(ierr); 2056 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2057 } else if (type == NORM_INFINITY) { /* max row norm */ 2058 PetscReal ntemp = 0.0; 2059 for (j=0; j<aij->A->rmap->n; j++) { 2060 v = amat->a + amat->i[j]; 2061 sum = 0.0; 2062 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2063 sum += PetscAbsScalar(*v); v++; 2064 } 2065 v = bmat->a + bmat->i[j]; 2066 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2067 sum += PetscAbsScalar(*v); v++; 2068 } 2069 if (sum > ntemp) ntemp = sum; 2070 } 2071 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2072 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatTranspose_MPIAIJ" 2079 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2080 { 2081 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2082 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2083 PetscErrorCode ierr; 2084 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2085 PetscInt cstart = A->cmap->rstart,ncol; 2086 Mat B; 2087 MatScalar *array; 2088 2089 PetscFunctionBegin; 2090 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2091 2092 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2093 ai = Aloc->i; aj = Aloc->j; 2094 bi = Bloc->i; bj = Bloc->j; 2095 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2096 PetscInt *d_nnz,*g_nnz,*o_nnz; 2097 PetscSFNode *oloc; 2098 PETSC_UNUSED PetscSF sf; 2099 2100 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2101 /* compute d_nnz for preallocation */ 2102 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2103 for (i=0; i<ai[ma]; i++) { 2104 d_nnz[aj[i]]++; 2105 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2106 } 2107 /* compute local off-diagonal contributions */ 2108 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2109 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2110 /* map those to global */ 2111 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2112 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2113 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2114 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2115 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2117 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2118 2119 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2120 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2121 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2122 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2123 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2124 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2125 } else { 2126 B = *matout; 2127 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2128 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2129 } 2130 2131 /* copy over the A part */ 2132 array = Aloc->a; 2133 row = A->rmap->rstart; 2134 for (i=0; i<ma; i++) { 2135 ncol = ai[i+1]-ai[i]; 2136 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2137 row++; 2138 array += ncol; aj += ncol; 2139 } 2140 aj = Aloc->j; 2141 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2142 2143 /* copy over the B part */ 2144 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2145 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2146 array = Bloc->a; 2147 row = A->rmap->rstart; 2148 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2149 cols_tmp = cols; 2150 for (i=0; i<mb; i++) { 2151 ncol = bi[i+1]-bi[i]; 2152 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2153 row++; 2154 array += ncol; cols_tmp += ncol; 2155 } 2156 ierr = PetscFree(cols);CHKERRQ(ierr); 2157 2158 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2160 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2161 *matout = B; 2162 } else { 2163 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2170 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2171 { 2172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2173 Mat a = aij->A,b = aij->B; 2174 PetscErrorCode ierr; 2175 PetscInt s1,s2,s3; 2176 2177 PetscFunctionBegin; 2178 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2179 if (rr) { 2180 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2181 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2182 /* Overlap communication with computation. */ 2183 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2184 } 2185 if (ll) { 2186 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2187 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2188 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2189 } 2190 /* scale the diagonal block */ 2191 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2192 2193 if (rr) { 2194 /* Do a scatter end and then right scale the off-diagonal block */ 2195 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2196 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2197 } 2198 PetscFunctionReturn(0); 2199 } 2200 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2203 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2204 { 2205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2206 PetscErrorCode ierr; 2207 2208 PetscFunctionBegin; 2209 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2210 PetscFunctionReturn(0); 2211 } 2212 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "MatEqual_MPIAIJ" 2215 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2216 { 2217 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2218 Mat a,b,c,d; 2219 PetscBool flg; 2220 PetscErrorCode ierr; 2221 2222 PetscFunctionBegin; 2223 a = matA->A; b = matA->B; 2224 c = matB->A; d = matB->B; 2225 2226 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2227 if (flg) { 2228 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2229 } 2230 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 #undef __FUNCT__ 2235 #define __FUNCT__ "MatCopy_MPIAIJ" 2236 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2237 { 2238 PetscErrorCode ierr; 2239 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2240 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2241 2242 PetscFunctionBegin; 2243 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2244 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2245 /* because of the column compression in the off-processor part of the matrix a->B, 2246 the number of columns in a->B and b->B may be different, hence we cannot call 2247 the MatCopy() directly on the two parts. If need be, we can provide a more 2248 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2249 then copying the submatrices */ 2250 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2251 } else { 2252 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2253 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2254 } 2255 PetscFunctionReturn(0); 2256 } 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "MatSetUp_MPIAIJ" 2260 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2261 { 2262 PetscErrorCode ierr; 2263 2264 PetscFunctionBegin; 2265 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2271 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2272 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2273 { 2274 PetscInt i,m=Y->rmap->N; 2275 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2276 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2277 const PetscInt *xi = x->i,*yi = y->i; 2278 2279 PetscFunctionBegin; 2280 /* Set the number of nonzeros in the new matrix */ 2281 for (i=0; i<m; i++) { 2282 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2283 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2284 nnz[i] = 0; 2285 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2286 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2287 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2288 nnz[i]++; 2289 } 2290 for (; k<nzy; k++) nnz[i]++; 2291 } 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatAXPY_MPIAIJ" 2297 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2298 { 2299 PetscErrorCode ierr; 2300 PetscInt i; 2301 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2302 PetscBLASInt bnz,one=1; 2303 Mat_SeqAIJ *x,*y; 2304 2305 PetscFunctionBegin; 2306 if (str == SAME_NONZERO_PATTERN) { 2307 PetscScalar alpha = a; 2308 x = (Mat_SeqAIJ*)xx->A->data; 2309 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2310 y = (Mat_SeqAIJ*)yy->A->data; 2311 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2312 x = (Mat_SeqAIJ*)xx->B->data; 2313 y = (Mat_SeqAIJ*)yy->B->data; 2314 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2315 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2316 } else if (str == SUBSET_NONZERO_PATTERN) { 2317 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2318 2319 x = (Mat_SeqAIJ*)xx->B->data; 2320 y = (Mat_SeqAIJ*)yy->B->data; 2321 if (y->xtoy && y->XtoY != xx->B) { 2322 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2323 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2324 } 2325 if (!y->xtoy) { /* get xtoy */ 2326 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2327 y->XtoY = xx->B; 2328 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2329 } 2330 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2331 } else { 2332 Mat B; 2333 PetscInt *nnz_d,*nnz_o; 2334 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2335 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2336 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2337 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2338 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2339 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2340 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2342 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2343 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2344 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2345 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2347 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2348 } 2349 PetscFunctionReturn(0); 2350 } 2351 2352 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2353 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "MatConjugate_MPIAIJ" 2356 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2357 { 2358 #if defined(PETSC_USE_COMPLEX) 2359 PetscErrorCode ierr; 2360 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2361 2362 PetscFunctionBegin; 2363 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2364 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2365 #else 2366 PetscFunctionBegin; 2367 #endif 2368 PetscFunctionReturn(0); 2369 } 2370 2371 #undef __FUNCT__ 2372 #define __FUNCT__ "MatRealPart_MPIAIJ" 2373 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2374 { 2375 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2376 PetscErrorCode ierr; 2377 2378 PetscFunctionBegin; 2379 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2380 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2386 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2387 { 2388 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2393 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #if defined(PETSC_HAVE_PBGL) 2398 2399 #include <boost/parallel/mpi/bsp_process_group.hpp> 2400 #include <boost/graph/distributed/ilu_default_graph.hpp> 2401 #include <boost/graph/distributed/ilu_0_block.hpp> 2402 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2403 #include <boost/graph/distributed/petsc/interface.hpp> 2404 #include <boost/multi_array.hpp> 2405 #include <boost/parallel/distributed_property_map->hpp> 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2409 /* 2410 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2411 */ 2412 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2413 { 2414 namespace petsc = boost::distributed::petsc; 2415 2416 namespace graph_dist = boost::graph::distributed; 2417 using boost::graph::distributed::ilu_default::process_group_type; 2418 using boost::graph::ilu_permuted; 2419 2420 PetscBool row_identity, col_identity; 2421 PetscContainer c; 2422 PetscInt m, n, M, N; 2423 PetscErrorCode ierr; 2424 2425 PetscFunctionBegin; 2426 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2427 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2428 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2429 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2430 2431 process_group_type pg; 2432 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2433 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2434 lgraph_type& level_graph = *lgraph_p; 2435 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2436 2437 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2438 ilu_permuted(level_graph); 2439 2440 /* put together the new matrix */ 2441 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2442 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2443 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2444 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2445 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2446 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2447 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2449 2450 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2451 ierr = PetscContainerSetPointer(c, lgraph_p); 2452 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2453 ierr = PetscContainerDestroy(&c); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 #undef __FUNCT__ 2458 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2459 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2460 { 2461 PetscFunctionBegin; 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatSolve_MPIAIJ" 2467 /* 2468 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2469 */ 2470 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2471 { 2472 namespace graph_dist = boost::graph::distributed; 2473 2474 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2475 lgraph_type *lgraph_p; 2476 PetscContainer c; 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2481 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2482 ierr = VecCopy(b, x);CHKERRQ(ierr); 2483 2484 PetscScalar *array_x; 2485 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2486 PetscInt sx; 2487 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2488 2489 PetscScalar *array_b; 2490 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2491 PetscInt sb; 2492 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2493 2494 lgraph_type& level_graph = *lgraph_p; 2495 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2496 2497 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2498 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2499 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2500 2501 typedef boost::iterator_property_map<array_ref_type::iterator, 2502 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2503 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2504 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2505 2506 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2507 PetscFunctionReturn(0); 2508 } 2509 #endif 2510 2511 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2512 PetscInt nzlocal,nsends,nrecvs; 2513 PetscMPIInt *send_rank,*recv_rank; 2514 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2515 PetscScalar *sbuf_a,**rbuf_a; 2516 PetscErrorCode (*Destroy)(Mat); 2517 } Mat_Redundant; 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2521 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2522 { 2523 PetscErrorCode ierr; 2524 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2525 PetscInt i; 2526 2527 PetscFunctionBegin; 2528 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2529 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2530 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2531 for (i=0; i<redund->nrecvs; i++) { 2532 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2533 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2534 } 2535 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2536 ierr = PetscFree(redund);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatDestroy_MatRedundant" 2542 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2543 { 2544 PetscErrorCode ierr; 2545 PetscContainer container; 2546 Mat_Redundant *redund=NULL; 2547 2548 PetscFunctionBegin; 2549 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2550 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2551 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2552 2553 A->ops->destroy = redund->Destroy; 2554 2555 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2556 if (A->ops->destroy) { 2557 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2558 } 2559 PetscFunctionReturn(0); 2560 } 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2564 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2565 { 2566 PetscMPIInt rank,size; 2567 MPI_Comm comm; 2568 PetscErrorCode ierr; 2569 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2570 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2571 PetscInt *rowrange = mat->rmap->range; 2572 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2573 Mat A = aij->A,B=aij->B,C=*matredundant; 2574 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2575 PetscScalar *sbuf_a; 2576 PetscInt nzlocal=a->nz+b->nz; 2577 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2578 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2579 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2580 MatScalar *aworkA,*aworkB; 2581 PetscScalar *vals; 2582 PetscMPIInt tag1,tag2,tag3,imdex; 2583 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2584 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2585 MPI_Status recv_status,*send_status; 2586 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2587 PetscInt **rbuf_j=NULL; 2588 PetscScalar **rbuf_a=NULL; 2589 Mat_Redundant *redund =NULL; 2590 PetscContainer container; 2591 2592 PetscFunctionBegin; 2593 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2594 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2595 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2596 2597 if (reuse == MAT_REUSE_MATRIX) { 2598 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2599 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2600 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2601 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2602 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2603 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2604 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2605 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2606 2607 nsends = redund->nsends; 2608 nrecvs = redund->nrecvs; 2609 send_rank = redund->send_rank; 2610 recv_rank = redund->recv_rank; 2611 sbuf_nz = redund->sbuf_nz; 2612 rbuf_nz = redund->rbuf_nz; 2613 sbuf_j = redund->sbuf_j; 2614 sbuf_a = redund->sbuf_a; 2615 rbuf_j = redund->rbuf_j; 2616 rbuf_a = redund->rbuf_a; 2617 } 2618 2619 if (reuse == MAT_INITIAL_MATRIX) { 2620 PetscMPIInt subrank,subsize; 2621 PetscInt nleftover,np_subcomm; 2622 /* get the destination processors' id send_rank, nsends and nrecvs */ 2623 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2624 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2625 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2626 2627 np_subcomm = size/nsubcomm; 2628 nleftover = size - nsubcomm*np_subcomm; 2629 2630 nsends = 0; nrecvs = 0; 2631 for (i=0; i<size; i++) { /* i=rank*/ 2632 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2633 send_rank[nsends] = i; nsends++; 2634 recv_rank[nrecvs++] = i; 2635 } 2636 } 2637 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2638 i = size-nleftover-1; 2639 j = 0; 2640 while (j < nsubcomm - nleftover) { 2641 send_rank[nsends++] = i; 2642 i--; j++; 2643 } 2644 } 2645 2646 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2647 for (i=0; i<nleftover; i++) { 2648 recv_rank[nrecvs++] = size-nleftover+i; 2649 } 2650 } 2651 2652 /* allocate sbuf_j, sbuf_a */ 2653 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2654 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2655 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2656 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2657 2658 /* copy mat's local entries into the buffers */ 2659 if (reuse == MAT_INITIAL_MATRIX) { 2660 rownz_max = 0; 2661 rptr = sbuf_j; 2662 cols = sbuf_j + rend-rstart + 1; 2663 vals = sbuf_a; 2664 rptr[0] = 0; 2665 for (i=0; i<rend-rstart; i++) { 2666 row = i + rstart; 2667 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2668 ncols = nzA + nzB; 2669 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2670 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2671 /* load the column indices for this row into cols */ 2672 lwrite = 0; 2673 for (l=0; l<nzB; l++) { 2674 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2675 vals[lwrite] = aworkB[l]; 2676 cols[lwrite++] = ctmp; 2677 } 2678 } 2679 for (l=0; l<nzA; l++) { 2680 vals[lwrite] = aworkA[l]; 2681 cols[lwrite++] = cstart + cworkA[l]; 2682 } 2683 for (l=0; l<nzB; l++) { 2684 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2685 vals[lwrite] = aworkB[l]; 2686 cols[lwrite++] = ctmp; 2687 } 2688 } 2689 vals += ncols; 2690 cols += ncols; 2691 rptr[i+1] = rptr[i] + ncols; 2692 if (rownz_max < ncols) rownz_max = ncols; 2693 } 2694 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2695 } else { /* only copy matrix values into sbuf_a */ 2696 rptr = sbuf_j; 2697 vals = sbuf_a; 2698 rptr[0] = 0; 2699 for (i=0; i<rend-rstart; i++) { 2700 row = i + rstart; 2701 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2702 ncols = nzA + nzB; 2703 cworkB = b->j + b->i[i]; 2704 aworkA = a->a + a->i[i]; 2705 aworkB = b->a + b->i[i]; 2706 lwrite = 0; 2707 for (l=0; l<nzB; l++) { 2708 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2709 } 2710 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2711 for (l=0; l<nzB; l++) { 2712 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2713 } 2714 vals += ncols; 2715 rptr[i+1] = rptr[i] + ncols; 2716 } 2717 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2718 2719 /* send nzlocal to others, and recv other's nzlocal */ 2720 /*--------------------------------------------------*/ 2721 if (reuse == MAT_INITIAL_MATRIX) { 2722 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2723 2724 s_waits2 = s_waits3 + nsends; 2725 s_waits1 = s_waits2 + nsends; 2726 r_waits1 = s_waits1 + nsends; 2727 r_waits2 = r_waits1 + nrecvs; 2728 r_waits3 = r_waits2 + nrecvs; 2729 } else { 2730 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2731 2732 r_waits3 = s_waits3 + nsends; 2733 } 2734 2735 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2736 if (reuse == MAT_INITIAL_MATRIX) { 2737 /* get new tags to keep the communication clean */ 2738 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2739 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2740 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2741 2742 /* post receives of other's nzlocal */ 2743 for (i=0; i<nrecvs; i++) { 2744 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2745 } 2746 /* send nzlocal to others */ 2747 for (i=0; i<nsends; i++) { 2748 sbuf_nz[i] = nzlocal; 2749 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2750 } 2751 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2752 count = nrecvs; 2753 while (count) { 2754 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2755 2756 recv_rank[imdex] = recv_status.MPI_SOURCE; 2757 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2758 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2759 2760 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2761 2762 rbuf_nz[imdex] += i + 2; 2763 2764 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2765 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2766 count--; 2767 } 2768 /* wait on sends of nzlocal */ 2769 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2770 /* send mat->i,j to others, and recv from other's */ 2771 /*------------------------------------------------*/ 2772 for (i=0; i<nsends; i++) { 2773 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2774 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2775 } 2776 /* wait on receives of mat->i,j */ 2777 /*------------------------------*/ 2778 count = nrecvs; 2779 while (count) { 2780 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2781 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2782 count--; 2783 } 2784 /* wait on sends of mat->i,j */ 2785 /*---------------------------*/ 2786 if (nsends) { 2787 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2788 } 2789 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2790 2791 /* post receives, send and receive mat->a */ 2792 /*----------------------------------------*/ 2793 for (imdex=0; imdex<nrecvs; imdex++) { 2794 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2795 } 2796 for (i=0; i<nsends; i++) { 2797 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2798 } 2799 count = nrecvs; 2800 while (count) { 2801 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2802 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2803 count--; 2804 } 2805 if (nsends) { 2806 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2807 } 2808 2809 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2810 2811 /* create redundant matrix */ 2812 /*-------------------------*/ 2813 if (reuse == MAT_INITIAL_MATRIX) { 2814 /* compute rownz_max for preallocation */ 2815 for (imdex=0; imdex<nrecvs; imdex++) { 2816 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2817 rptr = rbuf_j[imdex]; 2818 for (i=0; i<j; i++) { 2819 ncols = rptr[i+1] - rptr[i]; 2820 if (rownz_max < ncols) rownz_max = ncols; 2821 } 2822 } 2823 2824 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2825 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2826 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2827 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2828 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2829 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2830 } else { 2831 C = *matredundant; 2832 } 2833 2834 /* insert local matrix entries */ 2835 rptr = sbuf_j; 2836 cols = sbuf_j + rend-rstart + 1; 2837 vals = sbuf_a; 2838 for (i=0; i<rend-rstart; i++) { 2839 row = i + rstart; 2840 ncols = rptr[i+1] - rptr[i]; 2841 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2842 vals += ncols; 2843 cols += ncols; 2844 } 2845 /* insert received matrix entries */ 2846 for (imdex=0; imdex<nrecvs; imdex++) { 2847 rstart = rowrange[recv_rank[imdex]]; 2848 rend = rowrange[recv_rank[imdex]+1]; 2849 rptr = rbuf_j[imdex]; 2850 cols = rbuf_j[imdex] + rend-rstart + 1; 2851 vals = rbuf_a[imdex]; 2852 for (i=0; i<rend-rstart; i++) { 2853 row = i + rstart; 2854 ncols = rptr[i+1] - rptr[i]; 2855 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2856 vals += ncols; 2857 cols += ncols; 2858 } 2859 } 2860 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2863 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2864 if (reuse == MAT_INITIAL_MATRIX) { 2865 PetscContainer container; 2866 *matredundant = C; 2867 /* create a supporting struct and attach it to C for reuse */ 2868 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2869 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2870 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2871 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2872 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2873 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2874 2875 redund->nzlocal = nzlocal; 2876 redund->nsends = nsends; 2877 redund->nrecvs = nrecvs; 2878 redund->send_rank = send_rank; 2879 redund->recv_rank = recv_rank; 2880 redund->sbuf_nz = sbuf_nz; 2881 redund->rbuf_nz = rbuf_nz; 2882 redund->sbuf_j = sbuf_j; 2883 redund->sbuf_a = sbuf_a; 2884 redund->rbuf_j = rbuf_j; 2885 redund->rbuf_a = rbuf_a; 2886 2887 redund->Destroy = C->ops->destroy; 2888 C->ops->destroy = MatDestroy_MatRedundant; 2889 } 2890 PetscFunctionReturn(0); 2891 } 2892 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2895 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2896 { 2897 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2898 PetscErrorCode ierr; 2899 PetscInt i,*idxb = 0; 2900 PetscScalar *va,*vb; 2901 Vec vtmp; 2902 2903 PetscFunctionBegin; 2904 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2905 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2906 if (idx) { 2907 for (i=0; i<A->rmap->n; i++) { 2908 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2909 } 2910 } 2911 2912 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2913 if (idx) { 2914 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2915 } 2916 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2917 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2918 2919 for (i=0; i<A->rmap->n; i++) { 2920 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2921 va[i] = vb[i]; 2922 if (idx) idx[i] = a->garray[idxb[i]]; 2923 } 2924 } 2925 2926 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2928 ierr = PetscFree(idxb);CHKERRQ(ierr); 2929 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2930 PetscFunctionReturn(0); 2931 } 2932 2933 #undef __FUNCT__ 2934 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2935 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2936 { 2937 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2938 PetscErrorCode ierr; 2939 PetscInt i,*idxb = 0; 2940 PetscScalar *va,*vb; 2941 Vec vtmp; 2942 2943 PetscFunctionBegin; 2944 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2945 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2946 if (idx) { 2947 for (i=0; i<A->cmap->n; i++) { 2948 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2949 } 2950 } 2951 2952 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2953 if (idx) { 2954 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2955 } 2956 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2957 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2958 2959 for (i=0; i<A->rmap->n; i++) { 2960 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2961 va[i] = vb[i]; 2962 if (idx) idx[i] = a->garray[idxb[i]]; 2963 } 2964 } 2965 2966 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2967 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2968 ierr = PetscFree(idxb);CHKERRQ(ierr); 2969 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2975 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2976 { 2977 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2978 PetscInt n = A->rmap->n; 2979 PetscInt cstart = A->cmap->rstart; 2980 PetscInt *cmap = mat->garray; 2981 PetscInt *diagIdx, *offdiagIdx; 2982 Vec diagV, offdiagV; 2983 PetscScalar *a, *diagA, *offdiagA; 2984 PetscInt r; 2985 PetscErrorCode ierr; 2986 2987 PetscFunctionBegin; 2988 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2989 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2990 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2991 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2992 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2993 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2994 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2995 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2996 for (r = 0; r < n; ++r) { 2997 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2998 a[r] = diagA[r]; 2999 idx[r] = cstart + diagIdx[r]; 3000 } else { 3001 a[r] = offdiagA[r]; 3002 idx[r] = cmap[offdiagIdx[r]]; 3003 } 3004 } 3005 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3006 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3007 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3008 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3009 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3010 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3011 PetscFunctionReturn(0); 3012 } 3013 3014 #undef __FUNCT__ 3015 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3016 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3017 { 3018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3019 PetscInt n = A->rmap->n; 3020 PetscInt cstart = A->cmap->rstart; 3021 PetscInt *cmap = mat->garray; 3022 PetscInt *diagIdx, *offdiagIdx; 3023 Vec diagV, offdiagV; 3024 PetscScalar *a, *diagA, *offdiagA; 3025 PetscInt r; 3026 PetscErrorCode ierr; 3027 3028 PetscFunctionBegin; 3029 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3030 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3031 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3032 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3033 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3034 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3035 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3036 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3037 for (r = 0; r < n; ++r) { 3038 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3039 a[r] = diagA[r]; 3040 idx[r] = cstart + diagIdx[r]; 3041 } else { 3042 a[r] = offdiagA[r]; 3043 idx[r] = cmap[offdiagIdx[r]]; 3044 } 3045 } 3046 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3047 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3048 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3049 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3050 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3051 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 #undef __FUNCT__ 3056 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3057 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3058 { 3059 PetscErrorCode ierr; 3060 Mat *dummy; 3061 3062 PetscFunctionBegin; 3063 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3064 *newmat = *dummy; 3065 ierr = PetscFree(dummy);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3073 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3074 { 3075 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3076 PetscErrorCode ierr; 3077 3078 PetscFunctionBegin; 3079 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3080 PetscFunctionReturn(0); 3081 } 3082 3083 #undef __FUNCT__ 3084 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3085 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3086 { 3087 PetscErrorCode ierr; 3088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3089 3090 PetscFunctionBegin; 3091 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3092 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3093 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3094 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3095 PetscFunctionReturn(0); 3096 } 3097 3098 /* -------------------------------------------------------------------*/ 3099 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3100 MatGetRow_MPIAIJ, 3101 MatRestoreRow_MPIAIJ, 3102 MatMult_MPIAIJ, 3103 /* 4*/ MatMultAdd_MPIAIJ, 3104 MatMultTranspose_MPIAIJ, 3105 MatMultTransposeAdd_MPIAIJ, 3106 #if defined(PETSC_HAVE_PBGL) 3107 MatSolve_MPIAIJ, 3108 #else 3109 0, 3110 #endif 3111 0, 3112 0, 3113 /*10*/ 0, 3114 0, 3115 0, 3116 MatSOR_MPIAIJ, 3117 MatTranspose_MPIAIJ, 3118 /*15*/ MatGetInfo_MPIAIJ, 3119 MatEqual_MPIAIJ, 3120 MatGetDiagonal_MPIAIJ, 3121 MatDiagonalScale_MPIAIJ, 3122 MatNorm_MPIAIJ, 3123 /*20*/ MatAssemblyBegin_MPIAIJ, 3124 MatAssemblyEnd_MPIAIJ, 3125 MatSetOption_MPIAIJ, 3126 MatZeroEntries_MPIAIJ, 3127 /*24*/ MatZeroRows_MPIAIJ, 3128 0, 3129 #if defined(PETSC_HAVE_PBGL) 3130 0, 3131 #else 3132 0, 3133 #endif 3134 0, 3135 0, 3136 /*29*/ MatSetUp_MPIAIJ, 3137 #if defined(PETSC_HAVE_PBGL) 3138 0, 3139 #else 3140 0, 3141 #endif 3142 0, 3143 0, 3144 0, 3145 /*34*/ MatDuplicate_MPIAIJ, 3146 0, 3147 0, 3148 0, 3149 0, 3150 /*39*/ MatAXPY_MPIAIJ, 3151 MatGetSubMatrices_MPIAIJ, 3152 MatIncreaseOverlap_MPIAIJ, 3153 MatGetValues_MPIAIJ, 3154 MatCopy_MPIAIJ, 3155 /*44*/ MatGetRowMax_MPIAIJ, 3156 MatScale_MPIAIJ, 3157 0, 3158 0, 3159 MatZeroRowsColumns_MPIAIJ, 3160 /*49*/ MatSetRandom_MPIAIJ, 3161 0, 3162 0, 3163 0, 3164 0, 3165 /*54*/ MatFDColoringCreate_MPIAIJ, 3166 0, 3167 MatSetUnfactored_MPIAIJ, 3168 MatPermute_MPIAIJ, 3169 0, 3170 /*59*/ MatGetSubMatrix_MPIAIJ, 3171 MatDestroy_MPIAIJ, 3172 MatView_MPIAIJ, 3173 0, 3174 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3175 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3176 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3177 0, 3178 0, 3179 0, 3180 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3181 MatGetRowMinAbs_MPIAIJ, 3182 0, 3183 MatSetColoring_MPIAIJ, 3184 0, 3185 MatSetValuesAdifor_MPIAIJ, 3186 /*75*/ MatFDColoringApply_AIJ, 3187 0, 3188 0, 3189 0, 3190 MatFindZeroDiagonals_MPIAIJ, 3191 /*80*/ 0, 3192 0, 3193 0, 3194 /*83*/ MatLoad_MPIAIJ, 3195 0, 3196 0, 3197 0, 3198 0, 3199 0, 3200 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3201 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3202 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3203 MatPtAP_MPIAIJ_MPIAIJ, 3204 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3205 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3206 0, 3207 0, 3208 0, 3209 0, 3210 /*99*/ 0, 3211 0, 3212 0, 3213 MatConjugate_MPIAIJ, 3214 0, 3215 /*104*/MatSetValuesRow_MPIAIJ, 3216 MatRealPart_MPIAIJ, 3217 MatImaginaryPart_MPIAIJ, 3218 0, 3219 0, 3220 /*109*/0, 3221 MatGetRedundantMatrix_MPIAIJ, 3222 MatGetRowMin_MPIAIJ, 3223 0, 3224 0, 3225 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3226 0, 3227 0, 3228 0, 3229 0, 3230 /*119*/0, 3231 0, 3232 0, 3233 0, 3234 MatGetMultiProcBlock_MPIAIJ, 3235 /*124*/MatFindNonzeroRows_MPIAIJ, 3236 MatGetColumnNorms_MPIAIJ, 3237 MatInvertBlockDiagonal_MPIAIJ, 3238 0, 3239 MatGetSubMatricesParallel_MPIAIJ, 3240 /*129*/0, 3241 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3242 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3243 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3244 0, 3245 /*134*/0, 3246 0, 3247 0, 3248 0, 3249 0 3250 }; 3251 3252 /* ----------------------------------------------------------------------------------------*/ 3253 3254 #undef __FUNCT__ 3255 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3256 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3257 { 3258 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3259 PetscErrorCode ierr; 3260 3261 PetscFunctionBegin; 3262 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3263 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3264 PetscFunctionReturn(0); 3265 } 3266 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3269 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3270 { 3271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3272 PetscErrorCode ierr; 3273 3274 PetscFunctionBegin; 3275 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3276 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3277 PetscFunctionReturn(0); 3278 } 3279 3280 #undef __FUNCT__ 3281 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3282 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3283 { 3284 Mat_MPIAIJ *b; 3285 PetscErrorCode ierr; 3286 PetscInt i; 3287 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3288 3289 PetscFunctionBegin; 3290 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3291 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3292 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3293 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3294 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3295 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3296 3297 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3298 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3299 if (d_nnz) { 3300 for (i=0; i<B->rmap->n; i++) { 3301 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3302 } 3303 } 3304 if (o_nnz) { 3305 for (i=0; i<B->rmap->n; i++) { 3306 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3307 } 3308 } 3309 b = (Mat_MPIAIJ*)B->data; 3310 3311 if (!B->preallocated) { 3312 /* Explicitly create 2 MATSEQAIJ matrices. */ 3313 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3314 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3315 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3316 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3317 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3318 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3319 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3320 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3321 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3322 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3323 } 3324 3325 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3326 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3327 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3328 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3329 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3330 B->preallocated = PETSC_TRUE; 3331 PetscFunctionReturn(0); 3332 } 3333 3334 #undef __FUNCT__ 3335 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3336 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3337 { 3338 Mat mat; 3339 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3340 PetscErrorCode ierr; 3341 3342 PetscFunctionBegin; 3343 *newmat = 0; 3344 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3345 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3346 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3347 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3348 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3349 a = (Mat_MPIAIJ*)mat->data; 3350 3351 mat->factortype = matin->factortype; 3352 mat->rmap->bs = matin->rmap->bs; 3353 mat->cmap->bs = matin->cmap->bs; 3354 mat->assembled = PETSC_TRUE; 3355 mat->insertmode = NOT_SET_VALUES; 3356 mat->preallocated = PETSC_TRUE; 3357 3358 a->size = oldmat->size; 3359 a->rank = oldmat->rank; 3360 a->donotstash = oldmat->donotstash; 3361 a->roworiented = oldmat->roworiented; 3362 a->rowindices = 0; 3363 a->rowvalues = 0; 3364 a->getrowactive = PETSC_FALSE; 3365 3366 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3367 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3368 3369 if (oldmat->colmap) { 3370 #if defined(PETSC_USE_CTABLE) 3371 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3372 #else 3373 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3374 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3375 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3376 #endif 3377 } else a->colmap = 0; 3378 if (oldmat->garray) { 3379 PetscInt len; 3380 len = oldmat->B->cmap->n; 3381 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3382 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3383 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3384 } else a->garray = 0; 3385 3386 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3387 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3388 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3389 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3390 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3391 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3392 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3393 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3394 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3395 *newmat = mat; 3396 PetscFunctionReturn(0); 3397 } 3398 3399 3400 3401 #undef __FUNCT__ 3402 #define __FUNCT__ "MatLoad_MPIAIJ" 3403 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3404 { 3405 PetscScalar *vals,*svals; 3406 MPI_Comm comm; 3407 PetscErrorCode ierr; 3408 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3409 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3410 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3411 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3412 PetscInt cend,cstart,n,*rowners,sizesset=1; 3413 int fd; 3414 PetscInt bs = 1; 3415 3416 PetscFunctionBegin; 3417 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3418 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3419 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3420 if (!rank) { 3421 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3422 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3423 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3424 } 3425 3426 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3427 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3428 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3429 3430 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3431 3432 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3433 M = header[1]; N = header[2]; 3434 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3435 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3436 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3437 3438 /* If global sizes are set, check if they are consistent with that given in the file */ 3439 if (sizesset) { 3440 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3441 } 3442 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3443 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3444 3445 /* determine ownership of all (block) rows */ 3446 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3447 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3448 else m = newMat->rmap->n; /* Set by user */ 3449 3450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3451 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3452 3453 /* First process needs enough room for process with most rows */ 3454 if (!rank) { 3455 mmax = rowners[1]; 3456 for (i=2; i<=size; i++) { 3457 mmax = PetscMax(mmax, rowners[i]); 3458 } 3459 } else mmax = m; 3460 3461 rowners[0] = 0; 3462 for (i=2; i<=size; i++) { 3463 rowners[i] += rowners[i-1]; 3464 } 3465 rstart = rowners[rank]; 3466 rend = rowners[rank+1]; 3467 3468 /* distribute row lengths to all processors */ 3469 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3470 if (!rank) { 3471 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3472 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3473 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3474 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3475 for (j=0; j<m; j++) { 3476 procsnz[0] += ourlens[j]; 3477 } 3478 for (i=1; i<size; i++) { 3479 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3480 /* calculate the number of nonzeros on each processor */ 3481 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3482 procsnz[i] += rowlengths[j]; 3483 } 3484 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3485 } 3486 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3487 } else { 3488 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3489 } 3490 3491 if (!rank) { 3492 /* determine max buffer needed and allocate it */ 3493 maxnz = 0; 3494 for (i=0; i<size; i++) { 3495 maxnz = PetscMax(maxnz,procsnz[i]); 3496 } 3497 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3498 3499 /* read in my part of the matrix column indices */ 3500 nz = procsnz[0]; 3501 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3502 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3503 3504 /* read in every one elses and ship off */ 3505 for (i=1; i<size; i++) { 3506 nz = procsnz[i]; 3507 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3508 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3509 } 3510 ierr = PetscFree(cols);CHKERRQ(ierr); 3511 } else { 3512 /* determine buffer space needed for message */ 3513 nz = 0; 3514 for (i=0; i<m; i++) { 3515 nz += ourlens[i]; 3516 } 3517 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3518 3519 /* receive message of column indices*/ 3520 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3521 } 3522 3523 /* determine column ownership if matrix is not square */ 3524 if (N != M) { 3525 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3526 else n = newMat->cmap->n; 3527 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3528 cstart = cend - n; 3529 } else { 3530 cstart = rstart; 3531 cend = rend; 3532 n = cend - cstart; 3533 } 3534 3535 /* loop over local rows, determining number of off diagonal entries */ 3536 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3537 jj = 0; 3538 for (i=0; i<m; i++) { 3539 for (j=0; j<ourlens[i]; j++) { 3540 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3541 jj++; 3542 } 3543 } 3544 3545 for (i=0; i<m; i++) { 3546 ourlens[i] -= offlens[i]; 3547 } 3548 if (!sizesset) { 3549 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3550 } 3551 3552 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3553 3554 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3555 3556 for (i=0; i<m; i++) { 3557 ourlens[i] += offlens[i]; 3558 } 3559 3560 if (!rank) { 3561 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3562 3563 /* read in my part of the matrix numerical values */ 3564 nz = procsnz[0]; 3565 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3566 3567 /* insert into matrix */ 3568 jj = rstart; 3569 smycols = mycols; 3570 svals = vals; 3571 for (i=0; i<m; i++) { 3572 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3573 smycols += ourlens[i]; 3574 svals += ourlens[i]; 3575 jj++; 3576 } 3577 3578 /* read in other processors and ship out */ 3579 for (i=1; i<size; i++) { 3580 nz = procsnz[i]; 3581 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3582 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3583 } 3584 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3585 } else { 3586 /* receive numeric values */ 3587 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3588 3589 /* receive message of values*/ 3590 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3591 3592 /* insert into matrix */ 3593 jj = rstart; 3594 smycols = mycols; 3595 svals = vals; 3596 for (i=0; i<m; i++) { 3597 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3598 smycols += ourlens[i]; 3599 svals += ourlens[i]; 3600 jj++; 3601 } 3602 } 3603 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3604 ierr = PetscFree(vals);CHKERRQ(ierr); 3605 ierr = PetscFree(mycols);CHKERRQ(ierr); 3606 ierr = PetscFree(rowners);CHKERRQ(ierr); 3607 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3608 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3609 PetscFunctionReturn(0); 3610 } 3611 3612 #undef __FUNCT__ 3613 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3614 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3615 { 3616 PetscErrorCode ierr; 3617 IS iscol_local; 3618 PetscInt csize; 3619 3620 PetscFunctionBegin; 3621 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3622 if (call == MAT_REUSE_MATRIX) { 3623 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3624 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3625 } else { 3626 PetscInt cbs; 3627 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3628 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3629 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3630 } 3631 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3632 if (call == MAT_INITIAL_MATRIX) { 3633 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3634 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3635 } 3636 PetscFunctionReturn(0); 3637 } 3638 3639 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3640 #undef __FUNCT__ 3641 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3642 /* 3643 Not great since it makes two copies of the submatrix, first an SeqAIJ 3644 in local and then by concatenating the local matrices the end result. 3645 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3646 3647 Note: This requires a sequential iscol with all indices. 3648 */ 3649 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3650 { 3651 PetscErrorCode ierr; 3652 PetscMPIInt rank,size; 3653 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3654 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3655 PetscBool allcolumns, colflag; 3656 Mat M,Mreuse; 3657 MatScalar *vwork,*aa; 3658 MPI_Comm comm; 3659 Mat_SeqAIJ *aij; 3660 3661 PetscFunctionBegin; 3662 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3663 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3664 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3665 3666 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3667 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3668 if (colflag && ncol == mat->cmap->N) { 3669 allcolumns = PETSC_TRUE; 3670 } else { 3671 allcolumns = PETSC_FALSE; 3672 } 3673 if (call == MAT_REUSE_MATRIX) { 3674 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3675 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3676 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3677 } else { 3678 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3679 } 3680 3681 /* 3682 m - number of local rows 3683 n - number of columns (same on all processors) 3684 rstart - first row in new global matrix generated 3685 */ 3686 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3687 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3688 if (call == MAT_INITIAL_MATRIX) { 3689 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3690 ii = aij->i; 3691 jj = aij->j; 3692 3693 /* 3694 Determine the number of non-zeros in the diagonal and off-diagonal 3695 portions of the matrix in order to do correct preallocation 3696 */ 3697 3698 /* first get start and end of "diagonal" columns */ 3699 if (csize == PETSC_DECIDE) { 3700 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3701 if (mglobal == n) { /* square matrix */ 3702 nlocal = m; 3703 } else { 3704 nlocal = n/size + ((n % size) > rank); 3705 } 3706 } else { 3707 nlocal = csize; 3708 } 3709 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3710 rstart = rend - nlocal; 3711 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3712 3713 /* next, compute all the lengths */ 3714 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3715 olens = dlens + m; 3716 for (i=0; i<m; i++) { 3717 jend = ii[i+1] - ii[i]; 3718 olen = 0; 3719 dlen = 0; 3720 for (j=0; j<jend; j++) { 3721 if (*jj < rstart || *jj >= rend) olen++; 3722 else dlen++; 3723 jj++; 3724 } 3725 olens[i] = olen; 3726 dlens[i] = dlen; 3727 } 3728 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3729 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3730 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3731 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3732 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3733 ierr = PetscFree(dlens);CHKERRQ(ierr); 3734 } else { 3735 PetscInt ml,nl; 3736 3737 M = *newmat; 3738 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3739 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3740 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3741 /* 3742 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3743 rather than the slower MatSetValues(). 3744 */ 3745 M->was_assembled = PETSC_TRUE; 3746 M->assembled = PETSC_FALSE; 3747 } 3748 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3749 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3750 ii = aij->i; 3751 jj = aij->j; 3752 aa = aij->a; 3753 for (i=0; i<m; i++) { 3754 row = rstart + i; 3755 nz = ii[i+1] - ii[i]; 3756 cwork = jj; jj += nz; 3757 vwork = aa; aa += nz; 3758 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3759 } 3760 3761 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3762 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3763 *newmat = M; 3764 3765 /* save submatrix used in processor for next request */ 3766 if (call == MAT_INITIAL_MATRIX) { 3767 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3768 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3769 } 3770 PetscFunctionReturn(0); 3771 } 3772 3773 #undef __FUNCT__ 3774 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3775 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3776 { 3777 PetscInt m,cstart, cend,j,nnz,i,d; 3778 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3779 const PetscInt *JJ; 3780 PetscScalar *values; 3781 PetscErrorCode ierr; 3782 3783 PetscFunctionBegin; 3784 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3785 3786 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3787 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3788 m = B->rmap->n; 3789 cstart = B->cmap->rstart; 3790 cend = B->cmap->rend; 3791 rstart = B->rmap->rstart; 3792 3793 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3794 3795 #if defined(PETSC_USE_DEBUGGING) 3796 for (i=0; i<m; i++) { 3797 nnz = Ii[i+1]- Ii[i]; 3798 JJ = J + Ii[i]; 3799 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3800 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3801 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3802 } 3803 #endif 3804 3805 for (i=0; i<m; i++) { 3806 nnz = Ii[i+1]- Ii[i]; 3807 JJ = J + Ii[i]; 3808 nnz_max = PetscMax(nnz_max,nnz); 3809 d = 0; 3810 for (j=0; j<nnz; j++) { 3811 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3812 } 3813 d_nnz[i] = d; 3814 o_nnz[i] = nnz - d; 3815 } 3816 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3817 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3818 3819 if (v) values = (PetscScalar*)v; 3820 else { 3821 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3822 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3823 } 3824 3825 for (i=0; i<m; i++) { 3826 ii = i + rstart; 3827 nnz = Ii[i+1]- Ii[i]; 3828 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3829 } 3830 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3832 3833 if (!v) { 3834 ierr = PetscFree(values);CHKERRQ(ierr); 3835 } 3836 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3837 PetscFunctionReturn(0); 3838 } 3839 3840 #undef __FUNCT__ 3841 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3842 /*@ 3843 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3844 (the default parallel PETSc format). 3845 3846 Collective on MPI_Comm 3847 3848 Input Parameters: 3849 + B - the matrix 3850 . i - the indices into j for the start of each local row (starts with zero) 3851 . j - the column indices for each local row (starts with zero) 3852 - v - optional values in the matrix 3853 3854 Level: developer 3855 3856 Notes: 3857 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3858 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3859 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3860 3861 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3862 3863 The format which is used for the sparse matrix input, is equivalent to a 3864 row-major ordering.. i.e for the following matrix, the input data expected is 3865 as shown: 3866 3867 1 0 0 3868 2 0 3 P0 3869 ------- 3870 4 5 6 P1 3871 3872 Process0 [P0]: rows_owned=[0,1] 3873 i = {0,1,3} [size = nrow+1 = 2+1] 3874 j = {0,0,2} [size = nz = 6] 3875 v = {1,2,3} [size = nz = 6] 3876 3877 Process1 [P1]: rows_owned=[2] 3878 i = {0,3} [size = nrow+1 = 1+1] 3879 j = {0,1,2} [size = nz = 6] 3880 v = {4,5,6} [size = nz = 6] 3881 3882 .keywords: matrix, aij, compressed row, sparse, parallel 3883 3884 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3885 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3886 @*/ 3887 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3888 { 3889 PetscErrorCode ierr; 3890 3891 PetscFunctionBegin; 3892 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3893 PetscFunctionReturn(0); 3894 } 3895 3896 #undef __FUNCT__ 3897 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3898 /*@C 3899 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3900 (the default parallel PETSc format). For good matrix assembly performance 3901 the user should preallocate the matrix storage by setting the parameters 3902 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3903 performance can be increased by more than a factor of 50. 3904 3905 Collective on MPI_Comm 3906 3907 Input Parameters: 3908 + A - the matrix 3909 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3910 (same value is used for all local rows) 3911 . d_nnz - array containing the number of nonzeros in the various rows of the 3912 DIAGONAL portion of the local submatrix (possibly different for each row) 3913 or NULL, if d_nz is used to specify the nonzero structure. 3914 The size of this array is equal to the number of local rows, i.e 'm'. 3915 For matrices that will be factored, you must leave room for (and set) 3916 the diagonal entry even if it is zero. 3917 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3918 submatrix (same value is used for all local rows). 3919 - o_nnz - array containing the number of nonzeros in the various rows of the 3920 OFF-DIAGONAL portion of the local submatrix (possibly different for 3921 each row) or NULL, if o_nz is used to specify the nonzero 3922 structure. The size of this array is equal to the number 3923 of local rows, i.e 'm'. 3924 3925 If the *_nnz parameter is given then the *_nz parameter is ignored 3926 3927 The AIJ format (also called the Yale sparse matrix format or 3928 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3929 storage. The stored row and column indices begin with zero. 3930 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3931 3932 The parallel matrix is partitioned such that the first m0 rows belong to 3933 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3934 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3935 3936 The DIAGONAL portion of the local submatrix of a processor can be defined 3937 as the submatrix which is obtained by extraction the part corresponding to 3938 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3939 first row that belongs to the processor, r2 is the last row belonging to 3940 the this processor, and c1-c2 is range of indices of the local part of a 3941 vector suitable for applying the matrix to. This is an mxn matrix. In the 3942 common case of a square matrix, the row and column ranges are the same and 3943 the DIAGONAL part is also square. The remaining portion of the local 3944 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3945 3946 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3947 3948 You can call MatGetInfo() to get information on how effective the preallocation was; 3949 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3950 You can also run with the option -info and look for messages with the string 3951 malloc in them to see if additional memory allocation was needed. 3952 3953 Example usage: 3954 3955 Consider the following 8x8 matrix with 34 non-zero values, that is 3956 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3957 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3958 as follows: 3959 3960 .vb 3961 1 2 0 | 0 3 0 | 0 4 3962 Proc0 0 5 6 | 7 0 0 | 8 0 3963 9 0 10 | 11 0 0 | 12 0 3964 ------------------------------------- 3965 13 0 14 | 15 16 17 | 0 0 3966 Proc1 0 18 0 | 19 20 21 | 0 0 3967 0 0 0 | 22 23 0 | 24 0 3968 ------------------------------------- 3969 Proc2 25 26 27 | 0 0 28 | 29 0 3970 30 0 0 | 31 32 33 | 0 34 3971 .ve 3972 3973 This can be represented as a collection of submatrices as: 3974 3975 .vb 3976 A B C 3977 D E F 3978 G H I 3979 .ve 3980 3981 Where the submatrices A,B,C are owned by proc0, D,E,F are 3982 owned by proc1, G,H,I are owned by proc2. 3983 3984 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3985 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3986 The 'M','N' parameters are 8,8, and have the same values on all procs. 3987 3988 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3989 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3990 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3991 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3992 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3993 matrix, ans [DF] as another SeqAIJ matrix. 3994 3995 When d_nz, o_nz parameters are specified, d_nz storage elements are 3996 allocated for every row of the local diagonal submatrix, and o_nz 3997 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3998 One way to choose d_nz and o_nz is to use the max nonzerors per local 3999 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4000 In this case, the values of d_nz,o_nz are: 4001 .vb 4002 proc0 : dnz = 2, o_nz = 2 4003 proc1 : dnz = 3, o_nz = 2 4004 proc2 : dnz = 1, o_nz = 4 4005 .ve 4006 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4007 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4008 for proc3. i.e we are using 12+15+10=37 storage locations to store 4009 34 values. 4010 4011 When d_nnz, o_nnz parameters are specified, the storage is specified 4012 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4013 In the above case the values for d_nnz,o_nnz are: 4014 .vb 4015 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4016 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4017 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4018 .ve 4019 Here the space allocated is sum of all the above values i.e 34, and 4020 hence pre-allocation is perfect. 4021 4022 Level: intermediate 4023 4024 .keywords: matrix, aij, compressed row, sparse, parallel 4025 4026 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4027 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4028 @*/ 4029 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4030 { 4031 PetscErrorCode ierr; 4032 4033 PetscFunctionBegin; 4034 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4035 PetscValidType(B,1); 4036 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4037 PetscFunctionReturn(0); 4038 } 4039 4040 #undef __FUNCT__ 4041 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4042 /*@ 4043 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4044 CSR format the local rows. 4045 4046 Collective on MPI_Comm 4047 4048 Input Parameters: 4049 + comm - MPI communicator 4050 . m - number of local rows (Cannot be PETSC_DECIDE) 4051 . n - This value should be the same as the local size used in creating the 4052 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4053 calculated if N is given) For square matrices n is almost always m. 4054 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4055 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4056 . i - row indices 4057 . j - column indices 4058 - a - matrix values 4059 4060 Output Parameter: 4061 . mat - the matrix 4062 4063 Level: intermediate 4064 4065 Notes: 4066 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4067 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4068 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4069 4070 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4071 4072 The format which is used for the sparse matrix input, is equivalent to a 4073 row-major ordering.. i.e for the following matrix, the input data expected is 4074 as shown: 4075 4076 1 0 0 4077 2 0 3 P0 4078 ------- 4079 4 5 6 P1 4080 4081 Process0 [P0]: rows_owned=[0,1] 4082 i = {0,1,3} [size = nrow+1 = 2+1] 4083 j = {0,0,2} [size = nz = 6] 4084 v = {1,2,3} [size = nz = 6] 4085 4086 Process1 [P1]: rows_owned=[2] 4087 i = {0,3} [size = nrow+1 = 1+1] 4088 j = {0,1,2} [size = nz = 6] 4089 v = {4,5,6} [size = nz = 6] 4090 4091 .keywords: matrix, aij, compressed row, sparse, parallel 4092 4093 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4094 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4095 @*/ 4096 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4097 { 4098 PetscErrorCode ierr; 4099 4100 PetscFunctionBegin; 4101 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4102 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4103 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4104 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4105 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4106 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4107 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4108 PetscFunctionReturn(0); 4109 } 4110 4111 #undef __FUNCT__ 4112 #define __FUNCT__ "MatCreateAIJ" 4113 /*@C 4114 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4115 (the default parallel PETSc format). For good matrix assembly performance 4116 the user should preallocate the matrix storage by setting the parameters 4117 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4118 performance can be increased by more than a factor of 50. 4119 4120 Collective on MPI_Comm 4121 4122 Input Parameters: 4123 + comm - MPI communicator 4124 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4125 This value should be the same as the local size used in creating the 4126 y vector for the matrix-vector product y = Ax. 4127 . n - This value should be the same as the local size used in creating the 4128 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4129 calculated if N is given) For square matrices n is almost always m. 4130 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4131 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4132 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4133 (same value is used for all local rows) 4134 . d_nnz - array containing the number of nonzeros in the various rows of the 4135 DIAGONAL portion of the local submatrix (possibly different for each row) 4136 or NULL, if d_nz is used to specify the nonzero structure. 4137 The size of this array is equal to the number of local rows, i.e 'm'. 4138 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4139 submatrix (same value is used for all local rows). 4140 - o_nnz - array containing the number of nonzeros in the various rows of the 4141 OFF-DIAGONAL portion of the local submatrix (possibly different for 4142 each row) or NULL, if o_nz is used to specify the nonzero 4143 structure. The size of this array is equal to the number 4144 of local rows, i.e 'm'. 4145 4146 Output Parameter: 4147 . A - the matrix 4148 4149 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4150 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4151 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4152 4153 Notes: 4154 If the *_nnz parameter is given then the *_nz parameter is ignored 4155 4156 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4157 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4158 storage requirements for this matrix. 4159 4160 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4161 processor than it must be used on all processors that share the object for 4162 that argument. 4163 4164 The user MUST specify either the local or global matrix dimensions 4165 (possibly both). 4166 4167 The parallel matrix is partitioned across processors such that the 4168 first m0 rows belong to process 0, the next m1 rows belong to 4169 process 1, the next m2 rows belong to process 2 etc.. where 4170 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4171 values corresponding to [m x N] submatrix. 4172 4173 The columns are logically partitioned with the n0 columns belonging 4174 to 0th partition, the next n1 columns belonging to the next 4175 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4176 4177 The DIAGONAL portion of the local submatrix on any given processor 4178 is the submatrix corresponding to the rows and columns m,n 4179 corresponding to the given processor. i.e diagonal matrix on 4180 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4181 etc. The remaining portion of the local submatrix [m x (N-n)] 4182 constitute the OFF-DIAGONAL portion. The example below better 4183 illustrates this concept. 4184 4185 For a square global matrix we define each processor's diagonal portion 4186 to be its local rows and the corresponding columns (a square submatrix); 4187 each processor's off-diagonal portion encompasses the remainder of the 4188 local matrix (a rectangular submatrix). 4189 4190 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4191 4192 When calling this routine with a single process communicator, a matrix of 4193 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4194 type of communicator, use the construction mechanism: 4195 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4196 4197 By default, this format uses inodes (identical nodes) when possible. 4198 We search for consecutive rows with the same nonzero structure, thereby 4199 reusing matrix information to achieve increased efficiency. 4200 4201 Options Database Keys: 4202 + -mat_no_inode - Do not use inodes 4203 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4204 - -mat_aij_oneindex - Internally use indexing starting at 1 4205 rather than 0. Note that when calling MatSetValues(), 4206 the user still MUST index entries starting at 0! 4207 4208 4209 Example usage: 4210 4211 Consider the following 8x8 matrix with 34 non-zero values, that is 4212 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4213 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4214 as follows: 4215 4216 .vb 4217 1 2 0 | 0 3 0 | 0 4 4218 Proc0 0 5 6 | 7 0 0 | 8 0 4219 9 0 10 | 11 0 0 | 12 0 4220 ------------------------------------- 4221 13 0 14 | 15 16 17 | 0 0 4222 Proc1 0 18 0 | 19 20 21 | 0 0 4223 0 0 0 | 22 23 0 | 24 0 4224 ------------------------------------- 4225 Proc2 25 26 27 | 0 0 28 | 29 0 4226 30 0 0 | 31 32 33 | 0 34 4227 .ve 4228 4229 This can be represented as a collection of submatrices as: 4230 4231 .vb 4232 A B C 4233 D E F 4234 G H I 4235 .ve 4236 4237 Where the submatrices A,B,C are owned by proc0, D,E,F are 4238 owned by proc1, G,H,I are owned by proc2. 4239 4240 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4241 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4242 The 'M','N' parameters are 8,8, and have the same values on all procs. 4243 4244 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4245 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4246 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4247 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4248 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4249 matrix, ans [DF] as another SeqAIJ matrix. 4250 4251 When d_nz, o_nz parameters are specified, d_nz storage elements are 4252 allocated for every row of the local diagonal submatrix, and o_nz 4253 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4254 One way to choose d_nz and o_nz is to use the max nonzerors per local 4255 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4256 In this case, the values of d_nz,o_nz are: 4257 .vb 4258 proc0 : dnz = 2, o_nz = 2 4259 proc1 : dnz = 3, o_nz = 2 4260 proc2 : dnz = 1, o_nz = 4 4261 .ve 4262 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4263 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4264 for proc3. i.e we are using 12+15+10=37 storage locations to store 4265 34 values. 4266 4267 When d_nnz, o_nnz parameters are specified, the storage is specified 4268 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4269 In the above case the values for d_nnz,o_nnz are: 4270 .vb 4271 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4272 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4273 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4274 .ve 4275 Here the space allocated is sum of all the above values i.e 34, and 4276 hence pre-allocation is perfect. 4277 4278 Level: intermediate 4279 4280 .keywords: matrix, aij, compressed row, sparse, parallel 4281 4282 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4283 MPIAIJ, MatCreateMPIAIJWithArrays() 4284 @*/ 4285 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4286 { 4287 PetscErrorCode ierr; 4288 PetscMPIInt size; 4289 4290 PetscFunctionBegin; 4291 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4292 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4293 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4294 if (size > 1) { 4295 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4296 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4297 } else { 4298 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4299 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4300 } 4301 PetscFunctionReturn(0); 4302 } 4303 4304 #undef __FUNCT__ 4305 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4306 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4307 { 4308 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4309 4310 PetscFunctionBegin; 4311 *Ad = a->A; 4312 *Ao = a->B; 4313 *colmap = a->garray; 4314 PetscFunctionReturn(0); 4315 } 4316 4317 #undef __FUNCT__ 4318 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4319 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4320 { 4321 PetscErrorCode ierr; 4322 PetscInt i; 4323 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4324 4325 PetscFunctionBegin; 4326 if (coloring->ctype == IS_COLORING_GLOBAL) { 4327 ISColoringValue *allcolors,*colors; 4328 ISColoring ocoloring; 4329 4330 /* set coloring for diagonal portion */ 4331 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4332 4333 /* set coloring for off-diagonal portion */ 4334 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4335 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4336 for (i=0; i<a->B->cmap->n; i++) { 4337 colors[i] = allcolors[a->garray[i]]; 4338 } 4339 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4340 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4341 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4342 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4343 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4344 ISColoringValue *colors; 4345 PetscInt *larray; 4346 ISColoring ocoloring; 4347 4348 /* set coloring for diagonal portion */ 4349 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4350 for (i=0; i<a->A->cmap->n; i++) { 4351 larray[i] = i + A->cmap->rstart; 4352 } 4353 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4354 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4355 for (i=0; i<a->A->cmap->n; i++) { 4356 colors[i] = coloring->colors[larray[i]]; 4357 } 4358 ierr = PetscFree(larray);CHKERRQ(ierr); 4359 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4360 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4361 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4362 4363 /* set coloring for off-diagonal portion */ 4364 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4365 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4366 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4367 for (i=0; i<a->B->cmap->n; i++) { 4368 colors[i] = coloring->colors[larray[i]]; 4369 } 4370 ierr = PetscFree(larray);CHKERRQ(ierr); 4371 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4372 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4373 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4374 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4375 PetscFunctionReturn(0); 4376 } 4377 4378 #undef __FUNCT__ 4379 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4380 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4381 { 4382 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4383 PetscErrorCode ierr; 4384 4385 PetscFunctionBegin; 4386 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4387 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4388 PetscFunctionReturn(0); 4389 } 4390 4391 #undef __FUNCT__ 4392 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4393 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4394 { 4395 PetscErrorCode ierr; 4396 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4397 PetscInt *indx; 4398 4399 PetscFunctionBegin; 4400 /* This routine will ONLY return MPIAIJ type matrix */ 4401 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4402 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4403 if (n == PETSC_DECIDE) { 4404 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4405 } 4406 /* Check sum(n) = N */ 4407 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4408 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4409 4410 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4411 rstart -= m; 4412 4413 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4414 for (i=0; i<m; i++) { 4415 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4416 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4417 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4418 } 4419 4420 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4421 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4422 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4423 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4424 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4425 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4426 PetscFunctionReturn(0); 4427 } 4428 4429 #undef __FUNCT__ 4430 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4431 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4432 { 4433 PetscErrorCode ierr; 4434 PetscInt m,N,i,rstart,nnz,Ii; 4435 PetscInt *indx; 4436 PetscScalar *values; 4437 4438 PetscFunctionBegin; 4439 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4440 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4441 for (i=0; i<m; i++) { 4442 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4443 Ii = i + rstart; 4444 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4445 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4446 } 4447 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4448 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4449 PetscFunctionReturn(0); 4450 } 4451 4452 #undef __FUNCT__ 4453 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4454 /*@ 4455 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4456 matrices from each processor 4457 4458 Collective on MPI_Comm 4459 4460 Input Parameters: 4461 + comm - the communicators the parallel matrix will live on 4462 . inmat - the input sequential matrices 4463 . n - number of local columns (or PETSC_DECIDE) 4464 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4465 4466 Output Parameter: 4467 . outmat - the parallel matrix generated 4468 4469 Level: advanced 4470 4471 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4472 4473 @*/ 4474 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4475 { 4476 PetscErrorCode ierr; 4477 4478 PetscFunctionBegin; 4479 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4480 if (scall == MAT_INITIAL_MATRIX) { 4481 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4482 } 4483 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4484 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4485 PetscFunctionReturn(0); 4486 } 4487 4488 #undef __FUNCT__ 4489 #define __FUNCT__ "MatFileSplit" 4490 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4491 { 4492 PetscErrorCode ierr; 4493 PetscMPIInt rank; 4494 PetscInt m,N,i,rstart,nnz; 4495 size_t len; 4496 const PetscInt *indx; 4497 PetscViewer out; 4498 char *name; 4499 Mat B; 4500 const PetscScalar *values; 4501 4502 PetscFunctionBegin; 4503 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4504 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4505 /* Should this be the type of the diagonal block of A? */ 4506 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4507 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4508 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4509 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4510 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4511 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4512 for (i=0; i<m; i++) { 4513 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4514 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4515 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4516 } 4517 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4518 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4519 4520 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4521 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4522 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4523 sprintf(name,"%s.%d",outfile,rank); 4524 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4525 ierr = PetscFree(name);CHKERRQ(ierr); 4526 ierr = MatView(B,out);CHKERRQ(ierr); 4527 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4528 ierr = MatDestroy(&B);CHKERRQ(ierr); 4529 PetscFunctionReturn(0); 4530 } 4531 4532 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4533 #undef __FUNCT__ 4534 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4535 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4536 { 4537 PetscErrorCode ierr; 4538 Mat_Merge_SeqsToMPI *merge; 4539 PetscContainer container; 4540 4541 PetscFunctionBegin; 4542 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4543 if (container) { 4544 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4545 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4546 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4547 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4548 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4557 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4558 ierr = PetscFree(merge);CHKERRQ(ierr); 4559 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4560 } 4561 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4562 PetscFunctionReturn(0); 4563 } 4564 4565 #include <../src/mat/utils/freespace.h> 4566 #include <petscbt.h> 4567 4568 #undef __FUNCT__ 4569 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4570 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4571 { 4572 PetscErrorCode ierr; 4573 MPI_Comm comm; 4574 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4575 PetscMPIInt size,rank,taga,*len_s; 4576 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4577 PetscInt proc,m; 4578 PetscInt **buf_ri,**buf_rj; 4579 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4580 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4581 MPI_Request *s_waits,*r_waits; 4582 MPI_Status *status; 4583 MatScalar *aa=a->a; 4584 MatScalar **abuf_r,*ba_i; 4585 Mat_Merge_SeqsToMPI *merge; 4586 PetscContainer container; 4587 4588 PetscFunctionBegin; 4589 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4590 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4591 4592 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4593 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4594 4595 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4596 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4597 4598 bi = merge->bi; 4599 bj = merge->bj; 4600 buf_ri = merge->buf_ri; 4601 buf_rj = merge->buf_rj; 4602 4603 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4604 owners = merge->rowmap->range; 4605 len_s = merge->len_s; 4606 4607 /* send and recv matrix values */ 4608 /*-----------------------------*/ 4609 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4610 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4611 4612 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4613 for (proc=0,k=0; proc<size; proc++) { 4614 if (!len_s[proc]) continue; 4615 i = owners[proc]; 4616 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4617 k++; 4618 } 4619 4620 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4621 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4622 ierr = PetscFree(status);CHKERRQ(ierr); 4623 4624 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4625 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4626 4627 /* insert mat values of mpimat */ 4628 /*----------------------------*/ 4629 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4630 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4631 4632 for (k=0; k<merge->nrecv; k++) { 4633 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4634 nrows = *(buf_ri_k[k]); 4635 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4636 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4637 } 4638 4639 /* set values of ba */ 4640 m = merge->rowmap->n; 4641 for (i=0; i<m; i++) { 4642 arow = owners[rank] + i; 4643 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4644 bnzi = bi[i+1] - bi[i]; 4645 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4646 4647 /* add local non-zero vals of this proc's seqmat into ba */ 4648 anzi = ai[arow+1] - ai[arow]; 4649 aj = a->j + ai[arow]; 4650 aa = a->a + ai[arow]; 4651 nextaj = 0; 4652 for (j=0; nextaj<anzi; j++) { 4653 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4654 ba_i[j] += aa[nextaj++]; 4655 } 4656 } 4657 4658 /* add received vals into ba */ 4659 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4660 /* i-th row */ 4661 if (i == *nextrow[k]) { 4662 anzi = *(nextai[k]+1) - *nextai[k]; 4663 aj = buf_rj[k] + *(nextai[k]); 4664 aa = abuf_r[k] + *(nextai[k]); 4665 nextaj = 0; 4666 for (j=0; nextaj<anzi; j++) { 4667 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4668 ba_i[j] += aa[nextaj++]; 4669 } 4670 } 4671 nextrow[k]++; nextai[k]++; 4672 } 4673 } 4674 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4675 } 4676 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4677 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4678 4679 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4680 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4681 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4682 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4683 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4684 PetscFunctionReturn(0); 4685 } 4686 4687 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4688 4689 #undef __FUNCT__ 4690 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4691 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4692 { 4693 PetscErrorCode ierr; 4694 Mat B_mpi; 4695 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4696 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4697 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4698 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4699 PetscInt len,proc,*dnz,*onz,bs,cbs; 4700 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4701 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4702 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4703 MPI_Status *status; 4704 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4705 PetscBT lnkbt; 4706 Mat_Merge_SeqsToMPI *merge; 4707 PetscContainer container; 4708 4709 PetscFunctionBegin; 4710 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4711 4712 /* make sure it is a PETSc comm */ 4713 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4714 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4715 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4716 4717 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4718 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4719 4720 /* determine row ownership */ 4721 /*---------------------------------------------------------*/ 4722 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4723 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4724 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4725 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4726 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4727 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4728 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4729 4730 m = merge->rowmap->n; 4731 owners = merge->rowmap->range; 4732 4733 /* determine the number of messages to send, their lengths */ 4734 /*---------------------------------------------------------*/ 4735 len_s = merge->len_s; 4736 4737 len = 0; /* length of buf_si[] */ 4738 merge->nsend = 0; 4739 for (proc=0; proc<size; proc++) { 4740 len_si[proc] = 0; 4741 if (proc == rank) { 4742 len_s[proc] = 0; 4743 } else { 4744 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4745 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4746 } 4747 if (len_s[proc]) { 4748 merge->nsend++; 4749 nrows = 0; 4750 for (i=owners[proc]; i<owners[proc+1]; i++) { 4751 if (ai[i+1] > ai[i]) nrows++; 4752 } 4753 len_si[proc] = 2*(nrows+1); 4754 len += len_si[proc]; 4755 } 4756 } 4757 4758 /* determine the number and length of messages to receive for ij-structure */ 4759 /*-------------------------------------------------------------------------*/ 4760 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4761 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4762 4763 /* post the Irecv of j-structure */ 4764 /*-------------------------------*/ 4765 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4766 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4767 4768 /* post the Isend of j-structure */ 4769 /*--------------------------------*/ 4770 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4771 4772 for (proc=0, k=0; proc<size; proc++) { 4773 if (!len_s[proc]) continue; 4774 i = owners[proc]; 4775 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4776 k++; 4777 } 4778 4779 /* receives and sends of j-structure are complete */ 4780 /*------------------------------------------------*/ 4781 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4782 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4783 4784 /* send and recv i-structure */ 4785 /*---------------------------*/ 4786 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4787 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4788 4789 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4790 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4791 for (proc=0,k=0; proc<size; proc++) { 4792 if (!len_s[proc]) continue; 4793 /* form outgoing message for i-structure: 4794 buf_si[0]: nrows to be sent 4795 [1:nrows]: row index (global) 4796 [nrows+1:2*nrows+1]: i-structure index 4797 */ 4798 /*-------------------------------------------*/ 4799 nrows = len_si[proc]/2 - 1; 4800 buf_si_i = buf_si + nrows+1; 4801 buf_si[0] = nrows; 4802 buf_si_i[0] = 0; 4803 nrows = 0; 4804 for (i=owners[proc]; i<owners[proc+1]; i++) { 4805 anzi = ai[i+1] - ai[i]; 4806 if (anzi) { 4807 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4808 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4809 nrows++; 4810 } 4811 } 4812 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4813 k++; 4814 buf_si += len_si[proc]; 4815 } 4816 4817 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4818 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4819 4820 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4821 for (i=0; i<merge->nrecv; i++) { 4822 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4823 } 4824 4825 ierr = PetscFree(len_si);CHKERRQ(ierr); 4826 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4827 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4828 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4829 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4830 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4831 ierr = PetscFree(status);CHKERRQ(ierr); 4832 4833 /* compute a local seq matrix in each processor */ 4834 /*----------------------------------------------*/ 4835 /* allocate bi array and free space for accumulating nonzero column info */ 4836 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4837 bi[0] = 0; 4838 4839 /* create and initialize a linked list */ 4840 nlnk = N+1; 4841 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4842 4843 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4844 len = ai[owners[rank+1]] - ai[owners[rank]]; 4845 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4846 4847 current_space = free_space; 4848 4849 /* determine symbolic info for each local row */ 4850 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4851 4852 for (k=0; k<merge->nrecv; k++) { 4853 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4854 nrows = *buf_ri_k[k]; 4855 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4856 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4857 } 4858 4859 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4860 len = 0; 4861 for (i=0; i<m; i++) { 4862 bnzi = 0; 4863 /* add local non-zero cols of this proc's seqmat into lnk */ 4864 arow = owners[rank] + i; 4865 anzi = ai[arow+1] - ai[arow]; 4866 aj = a->j + ai[arow]; 4867 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4868 bnzi += nlnk; 4869 /* add received col data into lnk */ 4870 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4871 if (i == *nextrow[k]) { /* i-th row */ 4872 anzi = *(nextai[k]+1) - *nextai[k]; 4873 aj = buf_rj[k] + *nextai[k]; 4874 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4875 bnzi += nlnk; 4876 nextrow[k]++; nextai[k]++; 4877 } 4878 } 4879 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4880 4881 /* if free space is not available, make more free space */ 4882 if (current_space->local_remaining<bnzi) { 4883 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4884 nspacedouble++; 4885 } 4886 /* copy data into free space, then initialize lnk */ 4887 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4888 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4889 4890 current_space->array += bnzi; 4891 current_space->local_used += bnzi; 4892 current_space->local_remaining -= bnzi; 4893 4894 bi[i+1] = bi[i] + bnzi; 4895 } 4896 4897 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4898 4899 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4900 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4901 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4902 4903 /* create symbolic parallel matrix B_mpi */ 4904 /*---------------------------------------*/ 4905 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4906 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4907 if (n==PETSC_DECIDE) { 4908 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4909 } else { 4910 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4911 } 4912 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4913 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4914 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4915 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4916 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4917 4918 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4919 B_mpi->assembled = PETSC_FALSE; 4920 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4921 merge->bi = bi; 4922 merge->bj = bj; 4923 merge->buf_ri = buf_ri; 4924 merge->buf_rj = buf_rj; 4925 merge->coi = NULL; 4926 merge->coj = NULL; 4927 merge->owners_co = NULL; 4928 4929 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4930 4931 /* attach the supporting struct to B_mpi for reuse */ 4932 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4933 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4934 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4935 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4936 *mpimat = B_mpi; 4937 4938 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4939 PetscFunctionReturn(0); 4940 } 4941 4942 #undef __FUNCT__ 4943 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4944 /*@C 4945 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4946 matrices from each processor 4947 4948 Collective on MPI_Comm 4949 4950 Input Parameters: 4951 + comm - the communicators the parallel matrix will live on 4952 . seqmat - the input sequential matrices 4953 . m - number of local rows (or PETSC_DECIDE) 4954 . n - number of local columns (or PETSC_DECIDE) 4955 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4956 4957 Output Parameter: 4958 . mpimat - the parallel matrix generated 4959 4960 Level: advanced 4961 4962 Notes: 4963 The dimensions of the sequential matrix in each processor MUST be the same. 4964 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4965 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4966 @*/ 4967 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4968 { 4969 PetscErrorCode ierr; 4970 PetscMPIInt size; 4971 4972 PetscFunctionBegin; 4973 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4974 if (size == 1) { 4975 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4976 if (scall == MAT_INITIAL_MATRIX) { 4977 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4978 } else { 4979 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4980 } 4981 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4982 PetscFunctionReturn(0); 4983 } 4984 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4985 if (scall == MAT_INITIAL_MATRIX) { 4986 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4987 } 4988 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4989 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4990 PetscFunctionReturn(0); 4991 } 4992 4993 #undef __FUNCT__ 4994 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4995 /*@ 4996 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4997 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4998 with MatGetSize() 4999 5000 Not Collective 5001 5002 Input Parameters: 5003 + A - the matrix 5004 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5005 5006 Output Parameter: 5007 . A_loc - the local sequential matrix generated 5008 5009 Level: developer 5010 5011 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5012 5013 @*/ 5014 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5015 { 5016 PetscErrorCode ierr; 5017 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5018 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5019 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5020 MatScalar *aa=a->a,*ba=b->a,*cam; 5021 PetscScalar *ca; 5022 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5023 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5024 PetscBool match; 5025 5026 PetscFunctionBegin; 5027 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5028 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5029 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5030 if (scall == MAT_INITIAL_MATRIX) { 5031 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5032 ci[0] = 0; 5033 for (i=0; i<am; i++) { 5034 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5035 } 5036 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5037 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5038 k = 0; 5039 for (i=0; i<am; i++) { 5040 ncols_o = bi[i+1] - bi[i]; 5041 ncols_d = ai[i+1] - ai[i]; 5042 /* off-diagonal portion of A */ 5043 for (jo=0; jo<ncols_o; jo++) { 5044 col = cmap[*bj]; 5045 if (col >= cstart) break; 5046 cj[k] = col; bj++; 5047 ca[k++] = *ba++; 5048 } 5049 /* diagonal portion of A */ 5050 for (j=0; j<ncols_d; j++) { 5051 cj[k] = cstart + *aj++; 5052 ca[k++] = *aa++; 5053 } 5054 /* off-diagonal portion of A */ 5055 for (j=jo; j<ncols_o; j++) { 5056 cj[k] = cmap[*bj++]; 5057 ca[k++] = *ba++; 5058 } 5059 } 5060 /* put together the new matrix */ 5061 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5062 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5063 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5064 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5065 mat->free_a = PETSC_TRUE; 5066 mat->free_ij = PETSC_TRUE; 5067 mat->nonew = 0; 5068 } else if (scall == MAT_REUSE_MATRIX) { 5069 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5070 ci = mat->i; cj = mat->j; cam = mat->a; 5071 for (i=0; i<am; i++) { 5072 /* off-diagonal portion of A */ 5073 ncols_o = bi[i+1] - bi[i]; 5074 for (jo=0; jo<ncols_o; jo++) { 5075 col = cmap[*bj]; 5076 if (col >= cstart) break; 5077 *cam++ = *ba++; bj++; 5078 } 5079 /* diagonal portion of A */ 5080 ncols_d = ai[i+1] - ai[i]; 5081 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5082 /* off-diagonal portion of A */ 5083 for (j=jo; j<ncols_o; j++) { 5084 *cam++ = *ba++; bj++; 5085 } 5086 } 5087 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5088 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5089 PetscFunctionReturn(0); 5090 } 5091 5092 #undef __FUNCT__ 5093 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5094 /*@C 5095 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5096 5097 Not Collective 5098 5099 Input Parameters: 5100 + A - the matrix 5101 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5102 - row, col - index sets of rows and columns to extract (or NULL) 5103 5104 Output Parameter: 5105 . A_loc - the local sequential matrix generated 5106 5107 Level: developer 5108 5109 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5110 5111 @*/ 5112 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5113 { 5114 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5115 PetscErrorCode ierr; 5116 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5117 IS isrowa,iscola; 5118 Mat *aloc; 5119 PetscBool match; 5120 5121 PetscFunctionBegin; 5122 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5123 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5124 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5125 if (!row) { 5126 start = A->rmap->rstart; end = A->rmap->rend; 5127 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5128 } else { 5129 isrowa = *row; 5130 } 5131 if (!col) { 5132 start = A->cmap->rstart; 5133 cmap = a->garray; 5134 nzA = a->A->cmap->n; 5135 nzB = a->B->cmap->n; 5136 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5137 ncols = 0; 5138 for (i=0; i<nzB; i++) { 5139 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5140 else break; 5141 } 5142 imark = i; 5143 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5144 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5145 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5146 } else { 5147 iscola = *col; 5148 } 5149 if (scall != MAT_INITIAL_MATRIX) { 5150 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5151 aloc[0] = *A_loc; 5152 } 5153 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5154 *A_loc = aloc[0]; 5155 ierr = PetscFree(aloc);CHKERRQ(ierr); 5156 if (!row) { 5157 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5158 } 5159 if (!col) { 5160 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5161 } 5162 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5163 PetscFunctionReturn(0); 5164 } 5165 5166 #undef __FUNCT__ 5167 #define __FUNCT__ "MatGetBrowsOfAcols" 5168 /*@C 5169 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5170 5171 Collective on Mat 5172 5173 Input Parameters: 5174 + A,B - the matrices in mpiaij format 5175 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5176 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5177 5178 Output Parameter: 5179 + rowb, colb - index sets of rows and columns of B to extract 5180 - B_seq - the sequential matrix generated 5181 5182 Level: developer 5183 5184 @*/ 5185 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5186 { 5187 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5188 PetscErrorCode ierr; 5189 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5190 IS isrowb,iscolb; 5191 Mat *bseq=NULL; 5192 5193 PetscFunctionBegin; 5194 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5195 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5196 } 5197 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5198 5199 if (scall == MAT_INITIAL_MATRIX) { 5200 start = A->cmap->rstart; 5201 cmap = a->garray; 5202 nzA = a->A->cmap->n; 5203 nzB = a->B->cmap->n; 5204 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5205 ncols = 0; 5206 for (i=0; i<nzB; i++) { /* row < local row index */ 5207 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5208 else break; 5209 } 5210 imark = i; 5211 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5212 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5213 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5214 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5215 } else { 5216 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5217 isrowb = *rowb; iscolb = *colb; 5218 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5219 bseq[0] = *B_seq; 5220 } 5221 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5222 *B_seq = bseq[0]; 5223 ierr = PetscFree(bseq);CHKERRQ(ierr); 5224 if (!rowb) { 5225 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5226 } else { 5227 *rowb = isrowb; 5228 } 5229 if (!colb) { 5230 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5231 } else { 5232 *colb = iscolb; 5233 } 5234 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5235 PetscFunctionReturn(0); 5236 } 5237 5238 #undef __FUNCT__ 5239 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5240 /* 5241 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5242 of the OFF-DIAGONAL portion of local A 5243 5244 Collective on Mat 5245 5246 Input Parameters: 5247 + A,B - the matrices in mpiaij format 5248 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5249 5250 Output Parameter: 5251 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5252 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5253 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5254 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5255 5256 Level: developer 5257 5258 */ 5259 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5260 { 5261 VecScatter_MPI_General *gen_to,*gen_from; 5262 PetscErrorCode ierr; 5263 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5264 Mat_SeqAIJ *b_oth; 5265 VecScatter ctx =a->Mvctx; 5266 MPI_Comm comm; 5267 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5268 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5269 PetscScalar *rvalues,*svalues; 5270 MatScalar *b_otha,*bufa,*bufA; 5271 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5272 MPI_Request *rwaits = NULL,*swaits = NULL; 5273 MPI_Status *sstatus,rstatus; 5274 PetscMPIInt jj; 5275 PetscInt *cols,sbs,rbs; 5276 PetscScalar *vals; 5277 5278 PetscFunctionBegin; 5279 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5280 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5281 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5282 } 5283 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5284 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5285 5286 gen_to = (VecScatter_MPI_General*)ctx->todata; 5287 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5288 rvalues = gen_from->values; /* holds the length of receiving row */ 5289 svalues = gen_to->values; /* holds the length of sending row */ 5290 nrecvs = gen_from->n; 5291 nsends = gen_to->n; 5292 5293 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5294 srow = gen_to->indices; /* local row index to be sent */ 5295 sstarts = gen_to->starts; 5296 sprocs = gen_to->procs; 5297 sstatus = gen_to->sstatus; 5298 sbs = gen_to->bs; 5299 rstarts = gen_from->starts; 5300 rprocs = gen_from->procs; 5301 rbs = gen_from->bs; 5302 5303 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5304 if (scall == MAT_INITIAL_MATRIX) { 5305 /* i-array */ 5306 /*---------*/ 5307 /* post receives */ 5308 for (i=0; i<nrecvs; i++) { 5309 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5310 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5311 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5312 } 5313 5314 /* pack the outgoing message */ 5315 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5316 5317 sstartsj[0] = 0; 5318 rstartsj[0] = 0; 5319 len = 0; /* total length of j or a array to be sent */ 5320 k = 0; 5321 for (i=0; i<nsends; i++) { 5322 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5323 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5324 for (j=0; j<nrows; j++) { 5325 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5326 for (l=0; l<sbs; l++) { 5327 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5328 5329 rowlen[j*sbs+l] = ncols; 5330 5331 len += ncols; 5332 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5333 } 5334 k++; 5335 } 5336 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5337 5338 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5339 } 5340 /* recvs and sends of i-array are completed */ 5341 i = nrecvs; 5342 while (i--) { 5343 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5344 } 5345 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5346 5347 /* allocate buffers for sending j and a arrays */ 5348 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5349 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5350 5351 /* create i-array of B_oth */ 5352 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5353 5354 b_othi[0] = 0; 5355 len = 0; /* total length of j or a array to be received */ 5356 k = 0; 5357 for (i=0; i<nrecvs; i++) { 5358 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5359 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5360 for (j=0; j<nrows; j++) { 5361 b_othi[k+1] = b_othi[k] + rowlen[j]; 5362 len += rowlen[j]; k++; 5363 } 5364 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5365 } 5366 5367 /* allocate space for j and a arrrays of B_oth */ 5368 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5369 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5370 5371 /* j-array */ 5372 /*---------*/ 5373 /* post receives of j-array */ 5374 for (i=0; i<nrecvs; i++) { 5375 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5376 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5377 } 5378 5379 /* pack the outgoing message j-array */ 5380 k = 0; 5381 for (i=0; i<nsends; i++) { 5382 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5383 bufJ = bufj+sstartsj[i]; 5384 for (j=0; j<nrows; j++) { 5385 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5386 for (ll=0; ll<sbs; ll++) { 5387 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5388 for (l=0; l<ncols; l++) { 5389 *bufJ++ = cols[l]; 5390 } 5391 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5392 } 5393 } 5394 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5395 } 5396 5397 /* recvs and sends of j-array are completed */ 5398 i = nrecvs; 5399 while (i--) { 5400 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5401 } 5402 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5403 } else if (scall == MAT_REUSE_MATRIX) { 5404 sstartsj = *startsj_s; 5405 rstartsj = *startsj_r; 5406 bufa = *bufa_ptr; 5407 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5408 b_otha = b_oth->a; 5409 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5410 5411 /* a-array */ 5412 /*---------*/ 5413 /* post receives of a-array */ 5414 for (i=0; i<nrecvs; i++) { 5415 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5416 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5417 } 5418 5419 /* pack the outgoing message a-array */ 5420 k = 0; 5421 for (i=0; i<nsends; i++) { 5422 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5423 bufA = bufa+sstartsj[i]; 5424 for (j=0; j<nrows; j++) { 5425 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5426 for (ll=0; ll<sbs; ll++) { 5427 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5428 for (l=0; l<ncols; l++) { 5429 *bufA++ = vals[l]; 5430 } 5431 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5432 } 5433 } 5434 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5435 } 5436 /* recvs and sends of a-array are completed */ 5437 i = nrecvs; 5438 while (i--) { 5439 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5440 } 5441 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5442 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5443 5444 if (scall == MAT_INITIAL_MATRIX) { 5445 /* put together the new matrix */ 5446 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5447 5448 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5449 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5450 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5451 b_oth->free_a = PETSC_TRUE; 5452 b_oth->free_ij = PETSC_TRUE; 5453 b_oth->nonew = 0; 5454 5455 ierr = PetscFree(bufj);CHKERRQ(ierr); 5456 if (!startsj_s || !bufa_ptr) { 5457 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5458 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5459 } else { 5460 *startsj_s = sstartsj; 5461 *startsj_r = rstartsj; 5462 *bufa_ptr = bufa; 5463 } 5464 } 5465 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5466 PetscFunctionReturn(0); 5467 } 5468 5469 #undef __FUNCT__ 5470 #define __FUNCT__ "MatGetCommunicationStructs" 5471 /*@C 5472 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5473 5474 Not Collective 5475 5476 Input Parameters: 5477 . A - The matrix in mpiaij format 5478 5479 Output Parameter: 5480 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5481 . colmap - A map from global column index to local index into lvec 5482 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5483 5484 Level: developer 5485 5486 @*/ 5487 #if defined(PETSC_USE_CTABLE) 5488 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5489 #else 5490 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5491 #endif 5492 { 5493 Mat_MPIAIJ *a; 5494 5495 PetscFunctionBegin; 5496 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5497 PetscValidPointer(lvec, 2); 5498 PetscValidPointer(colmap, 3); 5499 PetscValidPointer(multScatter, 4); 5500 a = (Mat_MPIAIJ*) A->data; 5501 if (lvec) *lvec = a->lvec; 5502 if (colmap) *colmap = a->colmap; 5503 if (multScatter) *multScatter = a->Mvctx; 5504 PetscFunctionReturn(0); 5505 } 5506 5507 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5508 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5509 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5510 5511 #undef __FUNCT__ 5512 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5513 /* 5514 Computes (B'*A')' since computing B*A directly is untenable 5515 5516 n p p 5517 ( ) ( ) ( ) 5518 m ( A ) * n ( B ) = m ( C ) 5519 ( ) ( ) ( ) 5520 5521 */ 5522 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5523 { 5524 PetscErrorCode ierr; 5525 Mat At,Bt,Ct; 5526 5527 PetscFunctionBegin; 5528 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5529 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5530 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5531 ierr = MatDestroy(&At);CHKERRQ(ierr); 5532 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5533 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5534 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5535 PetscFunctionReturn(0); 5536 } 5537 5538 #undef __FUNCT__ 5539 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5540 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5541 { 5542 PetscErrorCode ierr; 5543 PetscInt m=A->rmap->n,n=B->cmap->n; 5544 Mat Cmat; 5545 5546 PetscFunctionBegin; 5547 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5548 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5549 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5550 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5551 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5552 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5553 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5554 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5555 5556 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5557 5558 *C = Cmat; 5559 PetscFunctionReturn(0); 5560 } 5561 5562 /* ----------------------------------------------------------------*/ 5563 #undef __FUNCT__ 5564 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5565 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5566 { 5567 PetscErrorCode ierr; 5568 5569 PetscFunctionBegin; 5570 if (scall == MAT_INITIAL_MATRIX) { 5571 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5572 } 5573 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5574 PetscFunctionReturn(0); 5575 } 5576 5577 #if defined(PETSC_HAVE_MUMPS) 5578 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5579 #endif 5580 #if defined(PETSC_HAVE_PASTIX) 5581 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5582 #endif 5583 #if defined(PETSC_HAVE_SUPERLU_DIST) 5584 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5585 #endif 5586 #if defined(PETSC_HAVE_CLIQUE) 5587 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5588 #endif 5589 5590 /*MC 5591 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5592 5593 Options Database Keys: 5594 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5595 5596 Level: beginner 5597 5598 .seealso: MatCreateAIJ() 5599 M*/ 5600 5601 #undef __FUNCT__ 5602 #define __FUNCT__ "MatCreate_MPIAIJ" 5603 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5604 { 5605 Mat_MPIAIJ *b; 5606 PetscErrorCode ierr; 5607 PetscMPIInt size; 5608 5609 PetscFunctionBegin; 5610 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5611 5612 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5613 B->data = (void*)b; 5614 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5615 B->assembled = PETSC_FALSE; 5616 B->insertmode = NOT_SET_VALUES; 5617 b->size = size; 5618 5619 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5620 5621 /* build cache for off array entries formed */ 5622 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5623 5624 b->donotstash = PETSC_FALSE; 5625 b->colmap = 0; 5626 b->garray = 0; 5627 b->roworiented = PETSC_TRUE; 5628 5629 /* stuff used for matrix vector multiply */ 5630 b->lvec = NULL; 5631 b->Mvctx = NULL; 5632 5633 /* stuff for MatGetRow() */ 5634 b->rowindices = 0; 5635 b->rowvalues = 0; 5636 b->getrowactive = PETSC_FALSE; 5637 5638 /* flexible pointer used in CUSP/CUSPARSE classes */ 5639 b->spptr = NULL; 5640 5641 #if defined(PETSC_HAVE_MUMPS) 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C","MatGetFactor_aij_mumps",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5643 #endif 5644 #if defined(PETSC_HAVE_PASTIX) 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C","MatGetFactor_mpiaij_pastix",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5646 #endif 5647 #if defined(PETSC_HAVE_SUPERLU_DIST) 5648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C","MatGetFactor_mpiaij_superlu_dist",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5649 #endif 5650 #if defined(PETSC_HAVE_CLIQUE) 5651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C","MatGetFactor_aij_clique",MatGetFactor_aij_clique);CHKERRQ(ierr); 5652 #endif 5653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIAIJ",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIAIJ",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIAIJ",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_MPIAIJ",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C","MatMPIAIJSetPreallocation_MPIAIJ",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C","MatMPIAIJSetPreallocationCSR_MPIAIJ",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIAIJ",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C","MatConvert_MPIAIJ_MPIAIJPERM",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C","MatConvert_MPIAIJ_MPIAIJCRL",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C","MatConvert_MPIAIJ_MPISBAIJ",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C","MatMatMult_MPIDense_MPIAIJ",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C","MatMatMultSymbolic_MPIDense_MPIAIJ",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C","MatMatMultNumeric_MPIDense_MPIAIJ",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5666 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5667 PetscFunctionReturn(0); 5668 } 5669 5670 #undef __FUNCT__ 5671 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5672 /*@ 5673 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5674 and "off-diagonal" part of the matrix in CSR format. 5675 5676 Collective on MPI_Comm 5677 5678 Input Parameters: 5679 + comm - MPI communicator 5680 . m - number of local rows (Cannot be PETSC_DECIDE) 5681 . n - This value should be the same as the local size used in creating the 5682 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5683 calculated if N is given) For square matrices n is almost always m. 5684 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5685 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5686 . i - row indices for "diagonal" portion of matrix 5687 . j - column indices 5688 . a - matrix values 5689 . oi - row indices for "off-diagonal" portion of matrix 5690 . oj - column indices 5691 - oa - matrix values 5692 5693 Output Parameter: 5694 . mat - the matrix 5695 5696 Level: advanced 5697 5698 Notes: 5699 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5700 must free the arrays once the matrix has been destroyed and not before. 5701 5702 The i and j indices are 0 based 5703 5704 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5705 5706 This sets local rows and cannot be used to set off-processor values. 5707 5708 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5709 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5710 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5711 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5712 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5713 communication if it is known that only local entries will be set. 5714 5715 .keywords: matrix, aij, compressed row, sparse, parallel 5716 5717 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5718 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5719 @*/ 5720 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5721 { 5722 PetscErrorCode ierr; 5723 Mat_MPIAIJ *maij; 5724 5725 PetscFunctionBegin; 5726 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5727 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5728 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5729 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5730 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5731 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5732 maij = (Mat_MPIAIJ*) (*mat)->data; 5733 5734 (*mat)->preallocated = PETSC_TRUE; 5735 5736 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5737 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5738 5739 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5740 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5741 5742 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5743 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5744 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5745 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5746 5747 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5748 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5749 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5750 PetscFunctionReturn(0); 5751 } 5752 5753 /* 5754 Special version for direct calls from Fortran 5755 */ 5756 #include <petsc-private/fortranimpl.h> 5757 5758 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5759 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5760 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5761 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5762 #endif 5763 5764 /* Change these macros so can be used in void function */ 5765 #undef CHKERRQ 5766 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5767 #undef SETERRQ2 5768 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5769 #undef SETERRQ3 5770 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5771 #undef SETERRQ 5772 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5773 5774 #undef __FUNCT__ 5775 #define __FUNCT__ "matsetvaluesmpiaij_" 5776 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5777 { 5778 Mat mat = *mmat; 5779 PetscInt m = *mm, n = *mn; 5780 InsertMode addv = *maddv; 5781 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5782 PetscScalar value; 5783 PetscErrorCode ierr; 5784 5785 MatCheckPreallocated(mat,1); 5786 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5787 5788 #if defined(PETSC_USE_DEBUG) 5789 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5790 #endif 5791 { 5792 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5793 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5794 PetscBool roworiented = aij->roworiented; 5795 5796 /* Some Variables required in the macro */ 5797 Mat A = aij->A; 5798 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5799 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5800 MatScalar *aa = a->a; 5801 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5802 Mat B = aij->B; 5803 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5804 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5805 MatScalar *ba = b->a; 5806 5807 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5808 PetscInt nonew = a->nonew; 5809 MatScalar *ap1,*ap2; 5810 5811 PetscFunctionBegin; 5812 for (i=0; i<m; i++) { 5813 if (im[i] < 0) continue; 5814 #if defined(PETSC_USE_DEBUG) 5815 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5816 #endif 5817 if (im[i] >= rstart && im[i] < rend) { 5818 row = im[i] - rstart; 5819 lastcol1 = -1; 5820 rp1 = aj + ai[row]; 5821 ap1 = aa + ai[row]; 5822 rmax1 = aimax[row]; 5823 nrow1 = ailen[row]; 5824 low1 = 0; 5825 high1 = nrow1; 5826 lastcol2 = -1; 5827 rp2 = bj + bi[row]; 5828 ap2 = ba + bi[row]; 5829 rmax2 = bimax[row]; 5830 nrow2 = bilen[row]; 5831 low2 = 0; 5832 high2 = nrow2; 5833 5834 for (j=0; j<n; j++) { 5835 if (roworiented) value = v[i*n+j]; 5836 else value = v[i+j*m]; 5837 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5838 if (in[j] >= cstart && in[j] < cend) { 5839 col = in[j] - cstart; 5840 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5841 } else if (in[j] < 0) continue; 5842 #if defined(PETSC_USE_DEBUG) 5843 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5844 #endif 5845 else { 5846 if (mat->was_assembled) { 5847 if (!aij->colmap) { 5848 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5849 } 5850 #if defined(PETSC_USE_CTABLE) 5851 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5852 col--; 5853 #else 5854 col = aij->colmap[in[j]] - 1; 5855 #endif 5856 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5857 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5858 col = in[j]; 5859 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5860 B = aij->B; 5861 b = (Mat_SeqAIJ*)B->data; 5862 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5863 rp2 = bj + bi[row]; 5864 ap2 = ba + bi[row]; 5865 rmax2 = bimax[row]; 5866 nrow2 = bilen[row]; 5867 low2 = 0; 5868 high2 = nrow2; 5869 bm = aij->B->rmap->n; 5870 ba = b->a; 5871 } 5872 } else col = in[j]; 5873 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5874 } 5875 } 5876 } else if (!aij->donotstash) { 5877 if (roworiented) { 5878 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5879 } else { 5880 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5881 } 5882 } 5883 } 5884 } 5885 PetscFunctionReturnVoid(); 5886 } 5887 5888