1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory((PetscObject)mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 714 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 715 716 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 717 718 aij->rowvalues = 0; 719 720 /* used by MatAXPY() */ 721 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 722 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 723 724 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 725 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 738 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_MPIAIJ" 744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 745 { 746 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 747 PetscErrorCode ierr; 748 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 749 PetscInt i,*owners = A->rmap->range; 750 PetscInt *nprocs,j,idx,nsends,row; 751 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 752 PetscInt *rvalues,count,base,slen,*source; 753 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 754 MPI_Comm comm; 755 MPI_Request *send_waits,*recv_waits; 756 MPI_Status recv_status,*send_status; 757 const PetscScalar *xx; 758 PetscScalar *bb; 759 #if defined(PETSC_DEBUG) 760 PetscBool found = PETSC_FALSE; 761 #endif 762 763 PetscFunctionBegin; 764 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 765 /* first count number of contributors to each processor */ 766 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 767 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 768 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 769 j = 0; 770 for (i=0; i<N; i++) { 771 if (lastidx > (idx = rows[i])) j = 0; 772 lastidx = idx; 773 for (; j<size; j++) { 774 if (idx >= owners[j] && idx < owners[j+1]) { 775 nprocs[2*j]++; 776 nprocs[2*j+1] = 1; 777 owner[i] = j; 778 #if defined(PETSC_DEBUG) 779 found = PETSC_TRUE; 780 #endif 781 break; 782 } 783 } 784 #if defined(PETSC_DEBUG) 785 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 786 found = PETSC_FALSE; 787 #endif 788 } 789 nsends = 0; 790 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 791 792 if (A->nooffproczerorows) { 793 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 794 nrecvs = nsends; 795 nmax = N; 796 } else { 797 /* inform other processors of number of messages and max length*/ 798 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 799 } 800 801 /* post receives: */ 802 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 804 for (i=0; i<nrecvs; i++) { 805 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 806 } 807 808 /* do sends: 809 1) starts[i] gives the starting index in svalues for stuff going to 810 the ith processor 811 */ 812 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 813 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 814 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 815 816 starts[0] = 0; 817 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 818 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 819 820 starts[0] = 0; 821 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 822 count = 0; 823 for (i=0; i<size; i++) { 824 if (nprocs[2*i+1]) { 825 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 826 } 827 } 828 ierr = PetscFree(starts);CHKERRQ(ierr); 829 830 base = owners[rank]; 831 832 /* wait on receives */ 833 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 834 count = nrecvs; slen = 0; 835 while (count) { 836 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 837 /* unpack receives into our local space */ 838 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 839 840 source[imdex] = recv_status.MPI_SOURCE; 841 lens[imdex] = n; 842 slen += n; 843 count--; 844 } 845 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 846 847 /* move the data into the send scatter */ 848 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 849 count = 0; 850 for (i=0; i<nrecvs; i++) { 851 values = rvalues + i*nmax; 852 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 853 } 854 ierr = PetscFree(rvalues);CHKERRQ(ierr); 855 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 856 ierr = PetscFree(owner);CHKERRQ(ierr); 857 ierr = PetscFree(nprocs);CHKERRQ(ierr); 858 859 /* fix right hand side if needed */ 860 if (x && b) { 861 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 862 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 863 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 864 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 865 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 866 } 867 /* 868 Zero the required rows. If the "diagonal block" of the matrix 869 is square and the user wishes to set the diagonal we use separate 870 code so that MatSetValues() is not called for each diagonal allocating 871 new memory, thus calling lots of mallocs and slowing things down. 872 873 */ 874 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 875 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 876 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 877 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 878 } else if (diag != 0.0) { 879 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 880 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 881 for (i = 0; i < slen; i++) { 882 row = lrows[i] + rstart; 883 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 884 } 885 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 886 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 } else { 888 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(lrows);CHKERRQ(ierr); 891 892 /* wait on sends */ 893 if (nsends) { 894 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 895 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 896 ierr = PetscFree(send_status);CHKERRQ(ierr); 897 } 898 ierr = PetscFree(send_waits);CHKERRQ(ierr); 899 ierr = PetscFree(svalues);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 905 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 906 { 907 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 908 PetscErrorCode ierr; 909 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 910 PetscInt i,*owners = A->rmap->range; 911 PetscInt *nprocs,j,idx,nsends; 912 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 913 PetscInt *rvalues,count,base,slen,*source; 914 PetscInt *lens,*lrows,*values,m; 915 MPI_Comm comm; 916 MPI_Request *send_waits,*recv_waits; 917 MPI_Status recv_status,*send_status; 918 const PetscScalar *xx; 919 PetscScalar *bb,*mask; 920 Vec xmask,lmask; 921 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 922 const PetscInt *aj, *ii,*ridx; 923 PetscScalar *aa; 924 #if defined(PETSC_DEBUG) 925 PetscBool found = PETSC_FALSE; 926 #endif 927 928 PetscFunctionBegin; 929 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 930 /* first count number of contributors to each processor */ 931 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 932 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 933 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 934 j = 0; 935 for (i=0; i<N; i++) { 936 if (lastidx > (idx = rows[i])) j = 0; 937 lastidx = idx; 938 for (; j<size; j++) { 939 if (idx >= owners[j] && idx < owners[j+1]) { 940 nprocs[2*j]++; 941 nprocs[2*j+1] = 1; 942 owner[i] = j; 943 #if defined(PETSC_DEBUG) 944 found = PETSC_TRUE; 945 #endif 946 break; 947 } 948 } 949 #if defined(PETSC_DEBUG) 950 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 951 found = PETSC_FALSE; 952 #endif 953 } 954 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 955 956 /* inform other processors of number of messages and max length*/ 957 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 958 959 /* post receives: */ 960 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 961 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 962 for (i=0; i<nrecvs; i++) { 963 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 964 } 965 966 /* do sends: 967 1) starts[i] gives the starting index in svalues for stuff going to 968 the ith processor 969 */ 970 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 971 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 972 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 973 974 starts[0] = 0; 975 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 976 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 977 978 starts[0] = 0; 979 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 980 count = 0; 981 for (i=0; i<size; i++) { 982 if (nprocs[2*i+1]) { 983 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 984 } 985 } 986 ierr = PetscFree(starts);CHKERRQ(ierr); 987 988 base = owners[rank]; 989 990 /* wait on receives */ 991 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 992 count = nrecvs; slen = 0; 993 while (count) { 994 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 995 /* unpack receives into our local space */ 996 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 997 998 source[imdex] = recv_status.MPI_SOURCE; 999 lens[imdex] = n; 1000 slen += n; 1001 count--; 1002 } 1003 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1004 1005 /* move the data into the send scatter */ 1006 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1007 count = 0; 1008 for (i=0; i<nrecvs; i++) { 1009 values = rvalues + i*nmax; 1010 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1011 } 1012 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1013 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1014 ierr = PetscFree(owner);CHKERRQ(ierr); 1015 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1016 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1017 1018 /* zero diagonal part of matrix */ 1019 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1020 1021 /* handle off diagonal part of matrix */ 1022 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1023 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1024 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1025 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use) { 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++) { 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMult_MPIAIJ" 1101 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 PetscInt nt; 1106 1107 PetscFunctionBegin; 1108 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1109 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1110 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1112 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1119 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1120 { 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 1136 PetscFunctionBegin; 1137 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1139 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1147 { 1148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1149 PetscErrorCode ierr; 1150 PetscBool merged; 1151 1152 PetscFunctionBegin; 1153 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1154 /* do nondiagonal part */ 1155 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1156 if (!merged) { 1157 /* send it on its way */ 1158 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 /* do local part */ 1160 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1161 /* receive remote parts: note this assumes the values are not actually */ 1162 /* added in yy until the next line, */ 1163 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1164 } else { 1165 /* do local part */ 1166 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1167 /* send it on its way */ 1168 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1169 /* values actually were received in the Begin() but we need to call this nop */ 1170 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1177 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1178 { 1179 MPI_Comm comm; 1180 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1181 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1182 IS Me,Notme; 1183 PetscErrorCode ierr; 1184 PetscInt M,N,first,last,*notme,i; 1185 PetscMPIInt size; 1186 1187 PetscFunctionBegin; 1188 /* Easy test: symmetric diagonal block */ 1189 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1190 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1191 if (!*f) PetscFunctionReturn(0); 1192 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1193 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1194 if (size == 1) PetscFunctionReturn(0); 1195 1196 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1197 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1198 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1199 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1200 for (i=0; i<first; i++) notme[i] = i; 1201 for (i=last; i<M; i++) notme[i-last+first] = i; 1202 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1203 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1204 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1205 Aoff = Aoffs[0]; 1206 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1207 Boff = Boffs[0]; 1208 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1209 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1211 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1213 ierr = PetscFree(notme);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1219 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1220 { 1221 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1222 PetscErrorCode ierr; 1223 1224 PetscFunctionBegin; 1225 /* do nondiagonal part */ 1226 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1227 /* send it on its way */ 1228 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1229 /* do local part */ 1230 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1231 /* receive remote parts */ 1232 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 /* 1237 This only works correctly for square matrices where the subblock A->A is the 1238 diagonal block 1239 */ 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1242 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1243 { 1244 PetscErrorCode ierr; 1245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1246 1247 PetscFunctionBegin; 1248 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1249 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1250 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatScale_MPIAIJ" 1256 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1257 { 1258 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1263 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatDestroy_MPIAIJ" 1269 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1270 { 1271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 #if defined(PETSC_USE_LOG) 1276 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1277 #endif 1278 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1279 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1280 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1282 #if defined(PETSC_USE_CTABLE) 1283 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1284 #else 1285 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1286 #endif 1287 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1288 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1289 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1290 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1291 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1292 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1293 1294 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1308 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1309 { 1310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1311 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1312 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1313 PetscErrorCode ierr; 1314 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1315 int fd; 1316 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1317 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1318 PetscScalar *column_values; 1319 PetscInt message_count,flowcontrolcount; 1320 FILE *file; 1321 1322 PetscFunctionBegin; 1323 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1324 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1325 nz = A->nz + B->nz; 1326 if (!rank) { 1327 header[0] = MAT_FILE_CLASSID; 1328 header[1] = mat->rmap->N; 1329 header[2] = mat->cmap->N; 1330 1331 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1332 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1333 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1334 /* get largest number of rows any processor has */ 1335 rlen = mat->rmap->n; 1336 range = mat->rmap->range; 1337 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1338 } else { 1339 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1340 rlen = mat->rmap->n; 1341 } 1342 1343 /* load up the local row counts */ 1344 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1345 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1346 1347 /* store the row lengths to the file */ 1348 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1349 if (!rank) { 1350 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1351 for (i=1; i<size; i++) { 1352 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1353 rlen = range[i+1] - range[i]; 1354 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1355 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1356 } 1357 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1358 } else { 1359 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1360 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1361 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1362 } 1363 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1364 1365 /* load up the local column indices */ 1366 nzmax = nz; /* th processor needs space a largest processor needs */ 1367 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1368 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1369 cnt = 0; 1370 for (i=0; i<mat->rmap->n; i++) { 1371 for (j=B->i[i]; j<B->i[i+1]; j++) { 1372 if ((col = garray[B->j[j]]) > cstart) break; 1373 column_indices[cnt++] = col; 1374 } 1375 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1376 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1377 } 1378 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1379 1380 /* store the column indices to the file */ 1381 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1382 if (!rank) { 1383 MPI_Status status; 1384 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1385 for (i=1; i<size; i++) { 1386 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1387 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1388 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1389 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1390 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1391 } 1392 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1393 } else { 1394 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1395 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1396 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1398 } 1399 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1400 1401 /* load up the local column values */ 1402 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1403 cnt = 0; 1404 for (i=0; i<mat->rmap->n; i++) { 1405 for (j=B->i[i]; j<B->i[i+1]; j++) { 1406 if (garray[B->j[j]] > cstart) break; 1407 column_values[cnt++] = B->a[j]; 1408 } 1409 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1410 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1411 } 1412 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1413 1414 /* store the column values to the file */ 1415 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1416 if (!rank) { 1417 MPI_Status status; 1418 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1419 for (i=1; i<size; i++) { 1420 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1421 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1422 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1423 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1424 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1425 } 1426 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1427 } else { 1428 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1429 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1430 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1432 } 1433 ierr = PetscFree(column_values);CHKERRQ(ierr); 1434 1435 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1436 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #include <petscdraw.h> 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1443 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1444 { 1445 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1446 PetscErrorCode ierr; 1447 PetscMPIInt rank = aij->rank,size = aij->size; 1448 PetscBool isdraw,iascii,isbinary; 1449 PetscViewer sviewer; 1450 PetscViewerFormat format; 1451 1452 PetscFunctionBegin; 1453 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1456 if (iascii) { 1457 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1458 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1459 MatInfo info; 1460 PetscBool inodes; 1461 1462 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1463 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1465 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1466 if (!inodes) { 1467 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1468 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1469 } else { 1470 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1471 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1472 } 1473 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1475 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1477 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1480 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1483 PetscInt inodecount,inodelimit,*inodes; 1484 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1485 if (inodes) { 1486 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1487 } else { 1488 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1489 } 1490 PetscFunctionReturn(0); 1491 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1492 PetscFunctionReturn(0); 1493 } 1494 } else if (isbinary) { 1495 if (size == 1) { 1496 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1497 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1498 } else { 1499 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1500 } 1501 PetscFunctionReturn(0); 1502 } else if (isdraw) { 1503 PetscDraw draw; 1504 PetscBool isnull; 1505 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1506 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1507 } 1508 1509 if (size == 1) { 1510 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1511 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1512 } else { 1513 /* assemble the entire matrix onto first processor. */ 1514 Mat A; 1515 Mat_SeqAIJ *Aloc; 1516 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1517 MatScalar *a; 1518 1519 if (mat->rmap->N > 1024) { 1520 PetscBool flg = PETSC_FALSE; 1521 1522 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1523 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1524 } 1525 1526 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1527 if (!rank) { 1528 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1529 } else { 1530 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1531 } 1532 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1533 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1534 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1535 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1536 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1537 1538 /* copy over the A part */ 1539 Aloc = (Mat_SeqAIJ*)aij->A->data; 1540 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1541 row = mat->rmap->rstart; 1542 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1543 for (i=0; i<m; i++) { 1544 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1545 row++; 1546 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1547 } 1548 aj = Aloc->j; 1549 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1550 1551 /* copy over the B part */ 1552 Aloc = (Mat_SeqAIJ*)aij->B->data; 1553 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1554 row = mat->rmap->rstart; 1555 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1556 ct = cols; 1557 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1558 for (i=0; i<m; i++) { 1559 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1560 row++; 1561 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1562 } 1563 ierr = PetscFree(ct);CHKERRQ(ierr); 1564 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1565 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 /* 1567 Everyone has to call to draw the matrix since the graphics waits are 1568 synchronized across all processors that share the PetscDraw object 1569 */ 1570 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1571 if (!rank) { 1572 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1573 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1574 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1575 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1576 } 1577 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1578 ierr = MatDestroy(&A);CHKERRQ(ierr); 1579 } 1580 PetscFunctionReturn(0); 1581 } 1582 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatView_MPIAIJ" 1585 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1586 { 1587 PetscErrorCode ierr; 1588 PetscBool iascii,isdraw,issocket,isbinary; 1589 1590 PetscFunctionBegin; 1591 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1595 if (iascii || isdraw || isbinary || issocket) { 1596 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1597 } 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatSOR_MPIAIJ" 1603 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1604 { 1605 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1606 PetscErrorCode ierr; 1607 Vec bb1 = 0; 1608 PetscBool hasop; 1609 1610 PetscFunctionBegin; 1611 if (flag == SOR_APPLY_UPPER) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 PetscFunctionReturn(0); 1614 } 1615 1616 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1617 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1618 } 1619 1620 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1621 if (flag & SOR_ZERO_INITIAL_GUESS) { 1622 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1623 its--; 1624 } 1625 1626 while (its--) { 1627 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 1630 /* update rhs: bb1 = bb - B*x */ 1631 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1632 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1633 1634 /* local sweep */ 1635 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1636 } 1637 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1638 if (flag & SOR_ZERO_INITIAL_GUESS) { 1639 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1640 its--; 1641 } 1642 while (its--) { 1643 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 1646 /* update rhs: bb1 = bb - B*x */ 1647 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1648 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1649 1650 /* local sweep */ 1651 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1652 } 1653 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1654 if (flag & SOR_ZERO_INITIAL_GUESS) { 1655 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1656 its--; 1657 } 1658 while (its--) { 1659 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 1662 /* update rhs: bb1 = bb - B*x */ 1663 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1664 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1665 1666 /* local sweep */ 1667 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1668 } 1669 } else if (flag & SOR_EISENSTAT) { 1670 Vec xx1; 1671 1672 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1673 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1674 1675 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1676 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 if (!mat->diag) { 1678 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1679 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1680 } 1681 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1682 if (hasop) { 1683 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1684 } else { 1685 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1686 } 1687 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1688 1689 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1690 1691 /* local sweep */ 1692 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1693 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1694 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1695 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1696 1697 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatPermute_MPIAIJ" 1703 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1704 { 1705 Mat aA,aB,Aperm; 1706 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1707 PetscScalar *aa,*ba; 1708 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1709 PetscSF rowsf,sf; 1710 IS parcolp = NULL; 1711 PetscBool done; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1716 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1717 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1718 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1719 1720 /* Invert row permutation to find out where my rows should go */ 1721 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1722 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1723 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1724 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1725 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1726 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 1728 /* Invert column permutation to find out where my columns should go */ 1729 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1730 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1731 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1732 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1733 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1734 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1736 1737 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1738 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1739 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1740 1741 /* Find out where my gcols should go */ 1742 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1743 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1744 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1745 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1746 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1747 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1748 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1750 1751 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1752 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1753 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1756 for (i=0; i<m; i++) { 1757 PetscInt row = rdest[i],rowner; 1758 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1759 for (j=ai[i]; j<ai[i+1]; j++) { 1760 PetscInt cowner,col = cdest[aj[j]]; 1761 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1762 if (rowner == cowner) dnnz[i]++; 1763 else onnz[i]++; 1764 } 1765 for (j=bi[i]; j<bi[i+1]; j++) { 1766 PetscInt cowner,col = gcdest[bj[j]]; 1767 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1768 if (rowner == cowner) dnnz[i]++; 1769 else onnz[i]++; 1770 } 1771 } 1772 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1773 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1775 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1779 1780 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1781 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1783 for (i=0; i<m; i++) { 1784 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1785 PetscInt rowlen; 1786 rowlen = ai[i+1] - ai[i]; 1787 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1788 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1789 rowlen = bi[i+1] - bi[i]; 1790 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1791 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1792 } 1793 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1794 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1797 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1799 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1800 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1801 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1802 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1803 *B = Aperm; 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1809 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1810 { 1811 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1812 Mat A = mat->A,B = mat->B; 1813 PetscErrorCode ierr; 1814 PetscReal isend[5],irecv[5]; 1815 1816 PetscFunctionBegin; 1817 info->block_size = 1.0; 1818 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1819 1820 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1821 isend[3] = info->memory; isend[4] = info->mallocs; 1822 1823 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1824 1825 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1826 isend[3] += info->memory; isend[4] += info->mallocs; 1827 if (flag == MAT_LOCAL) { 1828 info->nz_used = isend[0]; 1829 info->nz_allocated = isend[1]; 1830 info->nz_unneeded = isend[2]; 1831 info->memory = isend[3]; 1832 info->mallocs = isend[4]; 1833 } else if (flag == MAT_GLOBAL_MAX) { 1834 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1835 1836 info->nz_used = irecv[0]; 1837 info->nz_allocated = irecv[1]; 1838 info->nz_unneeded = irecv[2]; 1839 info->memory = irecv[3]; 1840 info->mallocs = irecv[4]; 1841 } else if (flag == MAT_GLOBAL_SUM) { 1842 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1843 1844 info->nz_used = irecv[0]; 1845 info->nz_allocated = irecv[1]; 1846 info->nz_unneeded = irecv[2]; 1847 info->memory = irecv[3]; 1848 info->mallocs = irecv[4]; 1849 } 1850 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1851 info->fill_ratio_needed = 0; 1852 info->factor_mallocs = 0; 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatSetOption_MPIAIJ" 1858 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1859 { 1860 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1861 PetscErrorCode ierr; 1862 1863 PetscFunctionBegin; 1864 switch (op) { 1865 case MAT_NEW_NONZERO_LOCATIONS: 1866 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1867 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1868 case MAT_KEEP_NONZERO_PATTERN: 1869 case MAT_NEW_NONZERO_LOCATION_ERR: 1870 case MAT_USE_INODES: 1871 case MAT_IGNORE_ZERO_ENTRIES: 1872 MatCheckPreallocated(A,1); 1873 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1874 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1875 break; 1876 case MAT_ROW_ORIENTED: 1877 a->roworiented = flg; 1878 1879 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1880 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1881 break; 1882 case MAT_NEW_DIAGONALS: 1883 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1884 break; 1885 case MAT_IGNORE_OFF_PROC_ENTRIES: 1886 a->donotstash = flg; 1887 break; 1888 case MAT_SPD: 1889 A->spd_set = PETSC_TRUE; 1890 A->spd = flg; 1891 if (flg) { 1892 A->symmetric = PETSC_TRUE; 1893 A->structurally_symmetric = PETSC_TRUE; 1894 A->symmetric_set = PETSC_TRUE; 1895 A->structurally_symmetric_set = PETSC_TRUE; 1896 } 1897 break; 1898 case MAT_SYMMETRIC: 1899 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1900 break; 1901 case MAT_STRUCTURALLY_SYMMETRIC: 1902 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1903 break; 1904 case MAT_HERMITIAN: 1905 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1906 break; 1907 case MAT_SYMMETRY_ETERNAL: 1908 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1909 break; 1910 default: 1911 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1912 } 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatGetRow_MPIAIJ" 1918 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1919 { 1920 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1921 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1922 PetscErrorCode ierr; 1923 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1924 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1925 PetscInt *cmap,*idx_p; 1926 1927 PetscFunctionBegin; 1928 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1929 mat->getrowactive = PETSC_TRUE; 1930 1931 if (!mat->rowvalues && (idx || v)) { 1932 /* 1933 allocate enough space to hold information from the longest row. 1934 */ 1935 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1936 PetscInt max = 1,tmp; 1937 for (i=0; i<matin->rmap->n; i++) { 1938 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1939 if (max < tmp) max = tmp; 1940 } 1941 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1942 } 1943 1944 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1945 lrow = row - rstart; 1946 1947 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1948 if (!v) {pvA = 0; pvB = 0;} 1949 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1950 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1951 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1952 nztot = nzA + nzB; 1953 1954 cmap = mat->garray; 1955 if (v || idx) { 1956 if (nztot) { 1957 /* Sort by increasing column numbers, assuming A and B already sorted */ 1958 PetscInt imark = -1; 1959 if (v) { 1960 *v = v_p = mat->rowvalues; 1961 for (i=0; i<nzB; i++) { 1962 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1963 else break; 1964 } 1965 imark = i; 1966 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1967 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1968 } 1969 if (idx) { 1970 *idx = idx_p = mat->rowindices; 1971 if (imark > -1) { 1972 for (i=0; i<imark; i++) { 1973 idx_p[i] = cmap[cworkB[i]]; 1974 } 1975 } else { 1976 for (i=0; i<nzB; i++) { 1977 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1978 else break; 1979 } 1980 imark = i; 1981 } 1982 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1983 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1984 } 1985 } else { 1986 if (idx) *idx = 0; 1987 if (v) *v = 0; 1988 } 1989 } 1990 *nz = nztot; 1991 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1992 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1998 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1999 { 2000 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2001 2002 PetscFunctionBegin; 2003 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2004 aij->getrowactive = PETSC_FALSE; 2005 PetscFunctionReturn(0); 2006 } 2007 2008 #undef __FUNCT__ 2009 #define __FUNCT__ "MatNorm_MPIAIJ" 2010 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2011 { 2012 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2013 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2014 PetscErrorCode ierr; 2015 PetscInt i,j,cstart = mat->cmap->rstart; 2016 PetscReal sum = 0.0; 2017 MatScalar *v; 2018 2019 PetscFunctionBegin; 2020 if (aij->size == 1) { 2021 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2022 } else { 2023 if (type == NORM_FROBENIUS) { 2024 v = amat->a; 2025 for (i=0; i<amat->nz; i++) { 2026 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2027 } 2028 v = bmat->a; 2029 for (i=0; i<bmat->nz; i++) { 2030 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2031 } 2032 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2033 *norm = PetscSqrtReal(*norm); 2034 } else if (type == NORM_1) { /* max column norm */ 2035 PetscReal *tmp,*tmp2; 2036 PetscInt *jj,*garray = aij->garray; 2037 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2039 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2040 *norm = 0.0; 2041 v = amat->a; jj = amat->j; 2042 for (j=0; j<amat->nz; j++) { 2043 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2044 } 2045 v = bmat->a; jj = bmat->j; 2046 for (j=0; j<bmat->nz; j++) { 2047 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2048 } 2049 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2050 for (j=0; j<mat->cmap->N; j++) { 2051 if (tmp2[j] > *norm) *norm = tmp2[j]; 2052 } 2053 ierr = PetscFree(tmp);CHKERRQ(ierr); 2054 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2055 } else if (type == NORM_INFINITY) { /* max row norm */ 2056 PetscReal ntemp = 0.0; 2057 for (j=0; j<aij->A->rmap->n; j++) { 2058 v = amat->a + amat->i[j]; 2059 sum = 0.0; 2060 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2061 sum += PetscAbsScalar(*v); v++; 2062 } 2063 v = bmat->a + bmat->i[j]; 2064 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2065 sum += PetscAbsScalar(*v); v++; 2066 } 2067 if (sum > ntemp) ntemp = sum; 2068 } 2069 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2070 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatTranspose_MPIAIJ" 2077 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2078 { 2079 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2080 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2081 PetscErrorCode ierr; 2082 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2083 PetscInt cstart = A->cmap->rstart,ncol; 2084 Mat B; 2085 MatScalar *array; 2086 2087 PetscFunctionBegin; 2088 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2089 2090 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2091 ai = Aloc->i; aj = Aloc->j; 2092 bi = Bloc->i; bj = Bloc->j; 2093 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2094 PetscInt *d_nnz,*g_nnz,*o_nnz; 2095 PetscSFNode *oloc; 2096 PETSC_UNUSED PetscSF sf; 2097 2098 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2099 /* compute d_nnz for preallocation */ 2100 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2101 for (i=0; i<ai[ma]; i++) { 2102 d_nnz[aj[i]]++; 2103 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2104 } 2105 /* compute local off-diagonal contributions */ 2106 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2107 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2108 /* map those to global */ 2109 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2110 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2111 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2112 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2113 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2114 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2116 2117 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2118 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2119 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2120 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2121 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2122 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2123 } else { 2124 B = *matout; 2125 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2126 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2127 } 2128 2129 /* copy over the A part */ 2130 array = Aloc->a; 2131 row = A->rmap->rstart; 2132 for (i=0; i<ma; i++) { 2133 ncol = ai[i+1]-ai[i]; 2134 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2135 row++; 2136 array += ncol; aj += ncol; 2137 } 2138 aj = Aloc->j; 2139 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2140 2141 /* copy over the B part */ 2142 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2143 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2144 array = Bloc->a; 2145 row = A->rmap->rstart; 2146 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2147 cols_tmp = cols; 2148 for (i=0; i<mb; i++) { 2149 ncol = bi[i+1]-bi[i]; 2150 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2151 row++; 2152 array += ncol; cols_tmp += ncol; 2153 } 2154 ierr = PetscFree(cols);CHKERRQ(ierr); 2155 2156 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2159 *matout = B; 2160 } else { 2161 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2168 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2169 { 2170 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2171 Mat a = aij->A,b = aij->B; 2172 PetscErrorCode ierr; 2173 PetscInt s1,s2,s3; 2174 2175 PetscFunctionBegin; 2176 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2177 if (rr) { 2178 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2179 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2180 /* Overlap communication with computation. */ 2181 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2182 } 2183 if (ll) { 2184 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2185 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2186 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2187 } 2188 /* scale the diagonal block */ 2189 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2190 2191 if (rr) { 2192 /* Do a scatter end and then right scale the off-diagonal block */ 2193 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2194 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2201 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2208 PetscFunctionReturn(0); 2209 } 2210 2211 #undef __FUNCT__ 2212 #define __FUNCT__ "MatEqual_MPIAIJ" 2213 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2214 { 2215 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2216 Mat a,b,c,d; 2217 PetscBool flg; 2218 PetscErrorCode ierr; 2219 2220 PetscFunctionBegin; 2221 a = matA->A; b = matA->B; 2222 c = matB->A; d = matB->B; 2223 2224 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2225 if (flg) { 2226 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2227 } 2228 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatCopy_MPIAIJ" 2234 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2235 { 2236 PetscErrorCode ierr; 2237 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2238 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2239 2240 PetscFunctionBegin; 2241 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2242 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2243 /* because of the column compression in the off-processor part of the matrix a->B, 2244 the number of columns in a->B and b->B may be different, hence we cannot call 2245 the MatCopy() directly on the two parts. If need be, we can provide a more 2246 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2247 then copying the submatrices */ 2248 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2249 } else { 2250 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2251 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatSetUp_MPIAIJ" 2258 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2259 { 2260 PetscErrorCode ierr; 2261 2262 PetscFunctionBegin; 2263 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2269 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2270 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2271 { 2272 PetscInt i,m=Y->rmap->N; 2273 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2274 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2275 const PetscInt *xi = x->i,*yi = y->i; 2276 2277 PetscFunctionBegin; 2278 /* Set the number of nonzeros in the new matrix */ 2279 for (i=0; i<m; i++) { 2280 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2281 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2282 nnz[i] = 0; 2283 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2284 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2285 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2286 nnz[i]++; 2287 } 2288 for (; k<nzy; k++) nnz[i]++; 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatAXPY_MPIAIJ" 2295 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2296 { 2297 PetscErrorCode ierr; 2298 PetscInt i; 2299 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2300 PetscBLASInt bnz,one=1; 2301 Mat_SeqAIJ *x,*y; 2302 2303 PetscFunctionBegin; 2304 if (str == SAME_NONZERO_PATTERN) { 2305 PetscScalar alpha = a; 2306 x = (Mat_SeqAIJ*)xx->A->data; 2307 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2308 y = (Mat_SeqAIJ*)yy->A->data; 2309 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2310 x = (Mat_SeqAIJ*)xx->B->data; 2311 y = (Mat_SeqAIJ*)yy->B->data; 2312 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2313 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2314 } else if (str == SUBSET_NONZERO_PATTERN) { 2315 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2316 2317 x = (Mat_SeqAIJ*)xx->B->data; 2318 y = (Mat_SeqAIJ*)yy->B->data; 2319 if (y->xtoy && y->XtoY != xx->B) { 2320 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2321 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2322 } 2323 if (!y->xtoy) { /* get xtoy */ 2324 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2325 y->XtoY = xx->B; 2326 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2327 } 2328 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2329 } else { 2330 Mat B; 2331 PetscInt *nnz_d,*nnz_o; 2332 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2333 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2334 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2335 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2336 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2337 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2338 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2339 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2341 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2342 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2343 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2344 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2346 } 2347 PetscFunctionReturn(0); 2348 } 2349 2350 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2351 2352 #undef __FUNCT__ 2353 #define __FUNCT__ "MatConjugate_MPIAIJ" 2354 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2355 { 2356 #if defined(PETSC_USE_COMPLEX) 2357 PetscErrorCode ierr; 2358 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2359 2360 PetscFunctionBegin; 2361 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2362 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2363 #else 2364 PetscFunctionBegin; 2365 #endif 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatRealPart_MPIAIJ" 2371 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2372 { 2373 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2374 PetscErrorCode ierr; 2375 2376 PetscFunctionBegin; 2377 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2378 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2384 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2385 { 2386 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2387 PetscErrorCode ierr; 2388 2389 PetscFunctionBegin; 2390 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2391 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #if defined(PETSC_HAVE_PBGL) 2396 2397 #include <boost/parallel/mpi/bsp_process_group.hpp> 2398 #include <boost/graph/distributed/ilu_default_graph.hpp> 2399 #include <boost/graph/distributed/ilu_0_block.hpp> 2400 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2401 #include <boost/graph/distributed/petsc/interface.hpp> 2402 #include <boost/multi_array.hpp> 2403 #include <boost/parallel/distributed_property_map->hpp> 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2407 /* 2408 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2409 */ 2410 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2411 { 2412 namespace petsc = boost::distributed::petsc; 2413 2414 namespace graph_dist = boost::graph::distributed; 2415 using boost::graph::distributed::ilu_default::process_group_type; 2416 using boost::graph::ilu_permuted; 2417 2418 PetscBool row_identity, col_identity; 2419 PetscContainer c; 2420 PetscInt m, n, M, N; 2421 PetscErrorCode ierr; 2422 2423 PetscFunctionBegin; 2424 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2425 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2426 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2427 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2428 2429 process_group_type pg; 2430 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2431 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2432 lgraph_type& level_graph = *lgraph_p; 2433 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2434 2435 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2436 ilu_permuted(level_graph); 2437 2438 /* put together the new matrix */ 2439 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2440 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2441 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2442 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2443 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2444 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2445 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2446 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 2448 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2449 ierr = PetscContainerSetPointer(c, lgraph_p); 2450 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2451 ierr = PetscContainerDestroy(&c); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 #undef __FUNCT__ 2456 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2457 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2458 { 2459 PetscFunctionBegin; 2460 PetscFunctionReturn(0); 2461 } 2462 2463 #undef __FUNCT__ 2464 #define __FUNCT__ "MatSolve_MPIAIJ" 2465 /* 2466 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2467 */ 2468 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2469 { 2470 namespace graph_dist = boost::graph::distributed; 2471 2472 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2473 lgraph_type *lgraph_p; 2474 PetscContainer c; 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2479 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2480 ierr = VecCopy(b, x);CHKERRQ(ierr); 2481 2482 PetscScalar *array_x; 2483 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2484 PetscInt sx; 2485 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2486 2487 PetscScalar *array_b; 2488 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2489 PetscInt sb; 2490 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2491 2492 lgraph_type& level_graph = *lgraph_p; 2493 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2494 2495 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2496 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2497 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2498 2499 typedef boost::iterator_property_map<array_ref_type::iterator, 2500 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2501 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2502 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2503 2504 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2505 PetscFunctionReturn(0); 2506 } 2507 #endif 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "MatDestroy_MatRedundant" 2511 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2512 { 2513 PetscErrorCode ierr; 2514 Mat_Redundant *redund; 2515 PetscInt i; 2516 PetscMPIInt size; 2517 2518 PetscFunctionBegin; 2519 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2520 if (size == 1) { 2521 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2522 redund = a->redundant; 2523 } else { 2524 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2525 redund = a->redundant; 2526 } 2527 if (redund){ 2528 if (redund->matseq) { /* via MatGetSubMatrices() */ 2529 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2530 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2531 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2532 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2533 } else { 2534 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2535 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2536 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2537 for (i=0; i<redund->nrecvs; i++) { 2538 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2539 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2540 } 2541 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2542 } 2543 2544 if (redund->psubcomm) { 2545 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2546 } 2547 ierr = redund->Destroy(A);CHKERRQ(ierr); 2548 ierr = PetscFree(redund);CHKERRQ(ierr); 2549 } 2550 PetscFunctionReturn(0); 2551 } 2552 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2555 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2556 { 2557 PetscMPIInt rank,size; 2558 MPI_Comm comm; 2559 PetscErrorCode ierr; 2560 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2561 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2562 PetscInt *rowrange = mat->rmap->range; 2563 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2564 Mat A = aij->A,B=aij->B,C=*matredundant; 2565 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2566 PetscScalar *sbuf_a; 2567 PetscInt nzlocal=a->nz+b->nz; 2568 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2569 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2570 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2571 MatScalar *aworkA,*aworkB; 2572 PetscScalar *vals; 2573 PetscMPIInt tag1,tag2,tag3,imdex; 2574 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2575 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2576 MPI_Status recv_status,*send_status; 2577 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2578 PetscInt **rbuf_j=NULL; 2579 PetscScalar **rbuf_a=NULL; 2580 Mat_Redundant *redund =NULL; 2581 2582 PetscFunctionBegin; 2583 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2584 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2585 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2586 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2587 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2588 2589 if (reuse == MAT_REUSE_MATRIX) { 2590 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2591 if (subsize == 1) { 2592 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2593 redund = c->redundant; 2594 } else { 2595 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2596 redund = c->redundant; 2597 } 2598 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2599 2600 nsends = redund->nsends; 2601 nrecvs = redund->nrecvs; 2602 send_rank = redund->send_rank; 2603 recv_rank = redund->recv_rank; 2604 sbuf_nz = redund->sbuf_nz; 2605 rbuf_nz = redund->rbuf_nz; 2606 sbuf_j = redund->sbuf_j; 2607 sbuf_a = redund->sbuf_a; 2608 rbuf_j = redund->rbuf_j; 2609 rbuf_a = redund->rbuf_a; 2610 } 2611 2612 if (reuse == MAT_INITIAL_MATRIX) { 2613 PetscInt nleftover,np_subcomm; 2614 2615 /* get the destination processors' id send_rank, nsends and nrecvs */ 2616 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2617 2618 np_subcomm = size/nsubcomm; 2619 nleftover = size - nsubcomm*np_subcomm; 2620 2621 /* block of codes below is specific for INTERLACED */ 2622 /* ------------------------------------------------*/ 2623 nsends = 0; nrecvs = 0; 2624 for (i=0; i<size; i++) { 2625 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2626 send_rank[nsends++] = i; 2627 recv_rank[nrecvs++] = i; 2628 } 2629 } 2630 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2631 i = size-nleftover-1; 2632 j = 0; 2633 while (j < nsubcomm - nleftover) { 2634 send_rank[nsends++] = i; 2635 i--; j++; 2636 } 2637 } 2638 2639 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2640 for (i=0; i<nleftover; i++) { 2641 recv_rank[nrecvs++] = size-nleftover+i; 2642 } 2643 } 2644 /*----------------------------------------------*/ 2645 2646 /* allocate sbuf_j, sbuf_a */ 2647 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2648 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2649 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2650 /* 2651 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2652 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 2653 */ 2654 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2655 2656 /* copy mat's local entries into the buffers */ 2657 if (reuse == MAT_INITIAL_MATRIX) { 2658 rownz_max = 0; 2659 rptr = sbuf_j; 2660 cols = sbuf_j + rend-rstart + 1; 2661 vals = sbuf_a; 2662 rptr[0] = 0; 2663 for (i=0; i<rend-rstart; i++) { 2664 row = i + rstart; 2665 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2666 ncols = nzA + nzB; 2667 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2668 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2669 /* load the column indices for this row into cols */ 2670 lwrite = 0; 2671 for (l=0; l<nzB; l++) { 2672 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2673 vals[lwrite] = aworkB[l]; 2674 cols[lwrite++] = ctmp; 2675 } 2676 } 2677 for (l=0; l<nzA; l++) { 2678 vals[lwrite] = aworkA[l]; 2679 cols[lwrite++] = cstart + cworkA[l]; 2680 } 2681 for (l=0; l<nzB; l++) { 2682 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2683 vals[lwrite] = aworkB[l]; 2684 cols[lwrite++] = ctmp; 2685 } 2686 } 2687 vals += ncols; 2688 cols += ncols; 2689 rptr[i+1] = rptr[i] + ncols; 2690 if (rownz_max < ncols) rownz_max = ncols; 2691 } 2692 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2693 } else { /* only copy matrix values into sbuf_a */ 2694 rptr = sbuf_j; 2695 vals = sbuf_a; 2696 rptr[0] = 0; 2697 for (i=0; i<rend-rstart; i++) { 2698 row = i + rstart; 2699 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2700 ncols = nzA + nzB; 2701 cworkB = b->j + b->i[i]; 2702 aworkA = a->a + a->i[i]; 2703 aworkB = b->a + b->i[i]; 2704 lwrite = 0; 2705 for (l=0; l<nzB; l++) { 2706 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2707 } 2708 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2709 for (l=0; l<nzB; l++) { 2710 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2711 } 2712 vals += ncols; 2713 rptr[i+1] = rptr[i] + ncols; 2714 } 2715 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2716 2717 /* send nzlocal to others, and recv other's nzlocal */ 2718 /*--------------------------------------------------*/ 2719 if (reuse == MAT_INITIAL_MATRIX) { 2720 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2721 2722 s_waits2 = s_waits3 + nsends; 2723 s_waits1 = s_waits2 + nsends; 2724 r_waits1 = s_waits1 + nsends; 2725 r_waits2 = r_waits1 + nrecvs; 2726 r_waits3 = r_waits2 + nrecvs; 2727 } else { 2728 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2729 2730 r_waits3 = s_waits3 + nsends; 2731 } 2732 2733 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2734 if (reuse == MAT_INITIAL_MATRIX) { 2735 /* get new tags to keep the communication clean */ 2736 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2737 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2738 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2739 2740 /* post receives of other's nzlocal */ 2741 for (i=0; i<nrecvs; i++) { 2742 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2743 } 2744 /* send nzlocal to others */ 2745 for (i=0; i<nsends; i++) { 2746 sbuf_nz[i] = nzlocal; 2747 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2748 } 2749 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2750 count = nrecvs; 2751 while (count) { 2752 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2753 2754 recv_rank[imdex] = recv_status.MPI_SOURCE; 2755 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2756 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2757 2758 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2759 2760 rbuf_nz[imdex] += i + 2; 2761 2762 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2763 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2764 count--; 2765 } 2766 /* wait on sends of nzlocal */ 2767 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2768 /* send mat->i,j to others, and recv from other's */ 2769 /*------------------------------------------------*/ 2770 for (i=0; i<nsends; i++) { 2771 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2772 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2773 } 2774 /* wait on receives of mat->i,j */ 2775 /*------------------------------*/ 2776 count = nrecvs; 2777 while (count) { 2778 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2779 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2780 count--; 2781 } 2782 /* wait on sends of mat->i,j */ 2783 /*---------------------------*/ 2784 if (nsends) { 2785 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2786 } 2787 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2788 2789 /* post receives, send and receive mat->a */ 2790 /*----------------------------------------*/ 2791 for (imdex=0; imdex<nrecvs; imdex++) { 2792 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2793 } 2794 for (i=0; i<nsends; i++) { 2795 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2796 } 2797 count = nrecvs; 2798 while (count) { 2799 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2800 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2801 count--; 2802 } 2803 if (nsends) { 2804 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2805 } 2806 2807 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2808 2809 /* create redundant matrix */ 2810 /*-------------------------*/ 2811 if (reuse == MAT_INITIAL_MATRIX) { 2812 const PetscInt *range; 2813 PetscInt rstart_sub,rend_sub,mloc_sub; 2814 2815 /* compute rownz_max for preallocation */ 2816 for (imdex=0; imdex<nrecvs; imdex++) { 2817 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2818 rptr = rbuf_j[imdex]; 2819 for (i=0; i<j; i++) { 2820 ncols = rptr[i+1] - rptr[i]; 2821 if (rownz_max < ncols) rownz_max = ncols; 2822 } 2823 } 2824 2825 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2826 2827 /* get local size of redundant matrix 2828 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2829 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2830 rstart_sub = range[nsubcomm*subrank]; 2831 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2832 rend_sub = range[nsubcomm*(subrank+1)]; 2833 } else { 2834 rend_sub = mat->rmap->N; 2835 } 2836 mloc_sub = rend_sub - rstart_sub; 2837 2838 if (M == N) { 2839 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2840 } else { /* non-square matrix */ 2841 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2842 } 2843 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2844 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2845 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2846 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2847 } else { 2848 C = *matredundant; 2849 } 2850 2851 /* insert local matrix entries */ 2852 rptr = sbuf_j; 2853 cols = sbuf_j + rend-rstart + 1; 2854 vals = sbuf_a; 2855 for (i=0; i<rend-rstart; i++) { 2856 row = i + rstart; 2857 ncols = rptr[i+1] - rptr[i]; 2858 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2859 vals += ncols; 2860 cols += ncols; 2861 } 2862 /* insert received matrix entries */ 2863 for (imdex=0; imdex<nrecvs; imdex++) { 2864 rstart = rowrange[recv_rank[imdex]]; 2865 rend = rowrange[recv_rank[imdex]+1]; 2866 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2867 rptr = rbuf_j[imdex]; 2868 cols = rbuf_j[imdex] + rend-rstart + 1; 2869 vals = rbuf_a[imdex]; 2870 for (i=0; i<rend-rstart; i++) { 2871 row = i + rstart; 2872 ncols = rptr[i+1] - rptr[i]; 2873 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2874 vals += ncols; 2875 cols += ncols; 2876 } 2877 } 2878 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2879 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2880 2881 if (reuse == MAT_INITIAL_MATRIX) { 2882 *matredundant = C; 2883 2884 /* create a supporting struct and attach it to C for reuse */ 2885 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2886 if (subsize == 1) { 2887 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2888 c->redundant = redund; 2889 } else { 2890 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2891 c->redundant = redund; 2892 } 2893 2894 redund->nzlocal = nzlocal; 2895 redund->nsends = nsends; 2896 redund->nrecvs = nrecvs; 2897 redund->send_rank = send_rank; 2898 redund->recv_rank = recv_rank; 2899 redund->sbuf_nz = sbuf_nz; 2900 redund->rbuf_nz = rbuf_nz; 2901 redund->sbuf_j = sbuf_j; 2902 redund->sbuf_a = sbuf_a; 2903 redund->rbuf_j = rbuf_j; 2904 redund->rbuf_a = rbuf_a; 2905 redund->psubcomm = NULL; 2906 2907 redund->Destroy = C->ops->destroy; 2908 C->ops->destroy = MatDestroy_MatRedundant; 2909 } 2910 PetscFunctionReturn(0); 2911 } 2912 2913 #undef __FUNCT__ 2914 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2915 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2916 { 2917 PetscErrorCode ierr; 2918 MPI_Comm comm; 2919 PetscMPIInt size,subsize; 2920 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2921 Mat_Redundant *redund=NULL; 2922 PetscSubcomm psubcomm=NULL; 2923 MPI_Comm subcomm_in=subcomm; 2924 Mat *matseq; 2925 IS isrow,iscol; 2926 2927 PetscFunctionBegin; 2928 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2929 if (reuse == MAT_INITIAL_MATRIX) { 2930 /* create psubcomm, then get subcomm */ 2931 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2932 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2933 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2934 2935 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2936 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2937 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2938 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2939 subcomm = psubcomm->comm; 2940 } else { /* retrieve psubcomm and subcomm */ 2941 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2942 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2943 if (subsize == 1) { 2944 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2945 redund = c->redundant; 2946 } else { 2947 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2948 redund = c->redundant; 2949 } 2950 psubcomm = redund->psubcomm; 2951 } 2952 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2953 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2954 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2955 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2956 if (subsize == 1) { 2957 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2958 c->redundant->psubcomm = psubcomm; 2959 } else { 2960 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2961 c->redundant->psubcomm = psubcomm ; 2962 } 2963 } 2964 PetscFunctionReturn(0); 2965 } 2966 } 2967 2968 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2969 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2970 if (reuse == MAT_INITIAL_MATRIX) { 2971 /* create a local sequential matrix matseq[0] */ 2972 mloc_sub = PETSC_DECIDE; 2973 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2974 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2975 rstart = rend - mloc_sub; 2976 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2977 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2978 } else { /* reuse == MAT_REUSE_MATRIX */ 2979 if (subsize == 1) { 2980 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2981 redund = c->redundant; 2982 } else { 2983 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2984 redund = c->redundant; 2985 } 2986 2987 isrow = redund->isrow; 2988 iscol = redund->iscol; 2989 matseq = redund->matseq; 2990 } 2991 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2992 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2993 2994 if (reuse == MAT_INITIAL_MATRIX) { 2995 /* create a supporting struct and attach it to C for reuse */ 2996 ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr); 2997 if (subsize == 1) { 2998 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2999 c->redundant = redund; 3000 } else { 3001 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 3002 c->redundant = redund; 3003 } 3004 redund->isrow = isrow; 3005 redund->iscol = iscol; 3006 redund->matseq = matseq; 3007 redund->psubcomm = psubcomm; 3008 redund->Destroy = (*matredundant)->ops->destroy; 3009 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 3010 } 3011 PetscFunctionReturn(0); 3012 } 3013 3014 #undef __FUNCT__ 3015 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 3016 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3017 { 3018 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3019 PetscErrorCode ierr; 3020 PetscInt i,*idxb = 0; 3021 PetscScalar *va,*vb; 3022 Vec vtmp; 3023 3024 PetscFunctionBegin; 3025 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 3026 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3027 if (idx) { 3028 for (i=0; i<A->rmap->n; i++) { 3029 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3030 } 3031 } 3032 3033 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3034 if (idx) { 3035 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3036 } 3037 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3038 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3039 3040 for (i=0; i<A->rmap->n; i++) { 3041 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 3042 va[i] = vb[i]; 3043 if (idx) idx[i] = a->garray[idxb[i]]; 3044 } 3045 } 3046 3047 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3048 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3049 ierr = PetscFree(idxb);CHKERRQ(ierr); 3050 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3051 PetscFunctionReturn(0); 3052 } 3053 3054 #undef __FUNCT__ 3055 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 3056 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3057 { 3058 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3059 PetscErrorCode ierr; 3060 PetscInt i,*idxb = 0; 3061 PetscScalar *va,*vb; 3062 Vec vtmp; 3063 3064 PetscFunctionBegin; 3065 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 3066 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3067 if (idx) { 3068 for (i=0; i<A->cmap->n; i++) { 3069 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3070 } 3071 } 3072 3073 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3074 if (idx) { 3075 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3076 } 3077 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3078 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3079 3080 for (i=0; i<A->rmap->n; i++) { 3081 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 3082 va[i] = vb[i]; 3083 if (idx) idx[i] = a->garray[idxb[i]]; 3084 } 3085 } 3086 3087 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3088 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3089 ierr = PetscFree(idxb);CHKERRQ(ierr); 3090 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3091 PetscFunctionReturn(0); 3092 } 3093 3094 #undef __FUNCT__ 3095 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3096 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3097 { 3098 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3099 PetscInt n = A->rmap->n; 3100 PetscInt cstart = A->cmap->rstart; 3101 PetscInt *cmap = mat->garray; 3102 PetscInt *diagIdx, *offdiagIdx; 3103 Vec diagV, offdiagV; 3104 PetscScalar *a, *diagA, *offdiagA; 3105 PetscInt r; 3106 PetscErrorCode ierr; 3107 3108 PetscFunctionBegin; 3109 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3110 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3111 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3112 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3113 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3114 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3115 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3116 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3117 for (r = 0; r < n; ++r) { 3118 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3119 a[r] = diagA[r]; 3120 idx[r] = cstart + diagIdx[r]; 3121 } else { 3122 a[r] = offdiagA[r]; 3123 idx[r] = cmap[offdiagIdx[r]]; 3124 } 3125 } 3126 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3127 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3128 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3129 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3130 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3131 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3132 PetscFunctionReturn(0); 3133 } 3134 3135 #undef __FUNCT__ 3136 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3137 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3138 { 3139 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3140 PetscInt n = A->rmap->n; 3141 PetscInt cstart = A->cmap->rstart; 3142 PetscInt *cmap = mat->garray; 3143 PetscInt *diagIdx, *offdiagIdx; 3144 Vec diagV, offdiagV; 3145 PetscScalar *a, *diagA, *offdiagA; 3146 PetscInt r; 3147 PetscErrorCode ierr; 3148 3149 PetscFunctionBegin; 3150 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3151 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3152 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3153 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3154 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3155 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3156 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3157 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3158 for (r = 0; r < n; ++r) { 3159 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3160 a[r] = diagA[r]; 3161 idx[r] = cstart + diagIdx[r]; 3162 } else { 3163 a[r] = offdiagA[r]; 3164 idx[r] = cmap[offdiagIdx[r]]; 3165 } 3166 } 3167 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3168 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3169 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3170 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3171 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3172 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3173 PetscFunctionReturn(0); 3174 } 3175 3176 #undef __FUNCT__ 3177 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3178 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3179 { 3180 PetscErrorCode ierr; 3181 Mat *dummy; 3182 3183 PetscFunctionBegin; 3184 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3185 *newmat = *dummy; 3186 ierr = PetscFree(dummy);CHKERRQ(ierr); 3187 PetscFunctionReturn(0); 3188 } 3189 3190 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3191 3192 #undef __FUNCT__ 3193 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3194 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3195 { 3196 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3197 PetscErrorCode ierr; 3198 3199 PetscFunctionBegin; 3200 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3201 PetscFunctionReturn(0); 3202 } 3203 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3206 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3207 { 3208 PetscErrorCode ierr; 3209 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3210 3211 PetscFunctionBegin; 3212 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3213 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3214 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3215 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3216 PetscFunctionReturn(0); 3217 } 3218 3219 /* -------------------------------------------------------------------*/ 3220 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3221 MatGetRow_MPIAIJ, 3222 MatRestoreRow_MPIAIJ, 3223 MatMult_MPIAIJ, 3224 /* 4*/ MatMultAdd_MPIAIJ, 3225 MatMultTranspose_MPIAIJ, 3226 MatMultTransposeAdd_MPIAIJ, 3227 #if defined(PETSC_HAVE_PBGL) 3228 MatSolve_MPIAIJ, 3229 #else 3230 0, 3231 #endif 3232 0, 3233 0, 3234 /*10*/ 0, 3235 0, 3236 0, 3237 MatSOR_MPIAIJ, 3238 MatTranspose_MPIAIJ, 3239 /*15*/ MatGetInfo_MPIAIJ, 3240 MatEqual_MPIAIJ, 3241 MatGetDiagonal_MPIAIJ, 3242 MatDiagonalScale_MPIAIJ, 3243 MatNorm_MPIAIJ, 3244 /*20*/ MatAssemblyBegin_MPIAIJ, 3245 MatAssemblyEnd_MPIAIJ, 3246 MatSetOption_MPIAIJ, 3247 MatZeroEntries_MPIAIJ, 3248 /*24*/ MatZeroRows_MPIAIJ, 3249 0, 3250 #if defined(PETSC_HAVE_PBGL) 3251 0, 3252 #else 3253 0, 3254 #endif 3255 0, 3256 0, 3257 /*29*/ MatSetUp_MPIAIJ, 3258 #if defined(PETSC_HAVE_PBGL) 3259 0, 3260 #else 3261 0, 3262 #endif 3263 0, 3264 0, 3265 0, 3266 /*34*/ MatDuplicate_MPIAIJ, 3267 0, 3268 0, 3269 0, 3270 0, 3271 /*39*/ MatAXPY_MPIAIJ, 3272 MatGetSubMatrices_MPIAIJ, 3273 MatIncreaseOverlap_MPIAIJ, 3274 MatGetValues_MPIAIJ, 3275 MatCopy_MPIAIJ, 3276 /*44*/ MatGetRowMax_MPIAIJ, 3277 MatScale_MPIAIJ, 3278 0, 3279 0, 3280 MatZeroRowsColumns_MPIAIJ, 3281 /*49*/ MatSetRandom_MPIAIJ, 3282 0, 3283 0, 3284 0, 3285 0, 3286 /*54*/ MatFDColoringCreate_MPIAIJ, 3287 0, 3288 MatSetUnfactored_MPIAIJ, 3289 MatPermute_MPIAIJ, 3290 0, 3291 /*59*/ MatGetSubMatrix_MPIAIJ, 3292 MatDestroy_MPIAIJ, 3293 MatView_MPIAIJ, 3294 0, 3295 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3296 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3297 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3298 0, 3299 0, 3300 0, 3301 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3302 MatGetRowMinAbs_MPIAIJ, 3303 0, 3304 MatSetColoring_MPIAIJ, 3305 0, 3306 MatSetValuesAdifor_MPIAIJ, 3307 /*75*/ MatFDColoringApply_AIJ, 3308 0, 3309 0, 3310 0, 3311 MatFindZeroDiagonals_MPIAIJ, 3312 /*80*/ 0, 3313 0, 3314 0, 3315 /*83*/ MatLoad_MPIAIJ, 3316 0, 3317 0, 3318 0, 3319 0, 3320 0, 3321 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3322 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3323 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3324 MatPtAP_MPIAIJ_MPIAIJ, 3325 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3326 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3327 0, 3328 0, 3329 0, 3330 0, 3331 /*99*/ 0, 3332 0, 3333 0, 3334 MatConjugate_MPIAIJ, 3335 0, 3336 /*104*/MatSetValuesRow_MPIAIJ, 3337 MatRealPart_MPIAIJ, 3338 MatImaginaryPart_MPIAIJ, 3339 0, 3340 0, 3341 /*109*/0, 3342 MatGetRedundantMatrix_MPIAIJ, 3343 MatGetRowMin_MPIAIJ, 3344 0, 3345 0, 3346 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3347 0, 3348 0, 3349 0, 3350 0, 3351 /*119*/0, 3352 0, 3353 0, 3354 0, 3355 MatGetMultiProcBlock_MPIAIJ, 3356 /*124*/MatFindNonzeroRows_MPIAIJ, 3357 MatGetColumnNorms_MPIAIJ, 3358 MatInvertBlockDiagonal_MPIAIJ, 3359 0, 3360 MatGetSubMatricesParallel_MPIAIJ, 3361 /*129*/0, 3362 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3363 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3364 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3365 0, 3366 /*134*/0, 3367 0, 3368 0, 3369 0, 3370 0, 3371 /*139*/0, 3372 0, 3373 0 3374 }; 3375 3376 /* ----------------------------------------------------------------------------------------*/ 3377 3378 #undef __FUNCT__ 3379 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3380 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3381 { 3382 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3383 PetscErrorCode ierr; 3384 3385 PetscFunctionBegin; 3386 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3387 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3388 PetscFunctionReturn(0); 3389 } 3390 3391 #undef __FUNCT__ 3392 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3393 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3394 { 3395 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3396 PetscErrorCode ierr; 3397 3398 PetscFunctionBegin; 3399 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3400 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3401 PetscFunctionReturn(0); 3402 } 3403 3404 #undef __FUNCT__ 3405 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3406 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3407 { 3408 Mat_MPIAIJ *b; 3409 PetscErrorCode ierr; 3410 3411 PetscFunctionBegin; 3412 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3413 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3414 b = (Mat_MPIAIJ*)B->data; 3415 3416 if (!B->preallocated) { 3417 /* Explicitly create 2 MATSEQAIJ matrices. */ 3418 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3419 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3420 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3421 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3422 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3423 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3424 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3425 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3426 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3427 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3428 } 3429 3430 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3431 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3432 B->preallocated = PETSC_TRUE; 3433 PetscFunctionReturn(0); 3434 } 3435 3436 #undef __FUNCT__ 3437 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3438 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3439 { 3440 Mat mat; 3441 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3442 PetscErrorCode ierr; 3443 3444 PetscFunctionBegin; 3445 *newmat = 0; 3446 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3447 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3448 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3449 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3450 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3451 a = (Mat_MPIAIJ*)mat->data; 3452 3453 mat->factortype = matin->factortype; 3454 mat->rmap->bs = matin->rmap->bs; 3455 mat->cmap->bs = matin->cmap->bs; 3456 mat->assembled = PETSC_TRUE; 3457 mat->insertmode = NOT_SET_VALUES; 3458 mat->preallocated = PETSC_TRUE; 3459 3460 a->size = oldmat->size; 3461 a->rank = oldmat->rank; 3462 a->donotstash = oldmat->donotstash; 3463 a->roworiented = oldmat->roworiented; 3464 a->rowindices = 0; 3465 a->rowvalues = 0; 3466 a->getrowactive = PETSC_FALSE; 3467 3468 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3469 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3470 3471 if (oldmat->colmap) { 3472 #if defined(PETSC_USE_CTABLE) 3473 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3474 #else 3475 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3476 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3477 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3478 #endif 3479 } else a->colmap = 0; 3480 if (oldmat->garray) { 3481 PetscInt len; 3482 len = oldmat->B->cmap->n; 3483 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3484 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3485 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3486 } else a->garray = 0; 3487 3488 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3489 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3490 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3491 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3492 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3493 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3494 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3495 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3496 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3497 *newmat = mat; 3498 PetscFunctionReturn(0); 3499 } 3500 3501 3502 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "MatLoad_MPIAIJ" 3505 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3506 { 3507 PetscScalar *vals,*svals; 3508 MPI_Comm comm; 3509 PetscErrorCode ierr; 3510 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3511 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3512 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3513 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3514 PetscInt cend,cstart,n,*rowners,sizesset=1; 3515 int fd; 3516 PetscInt bs = 1; 3517 3518 PetscFunctionBegin; 3519 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3520 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3521 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3522 if (!rank) { 3523 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3524 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3525 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3526 } 3527 3528 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3529 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3530 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3531 3532 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3533 3534 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3535 M = header[1]; N = header[2]; 3536 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3537 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3538 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3539 3540 /* If global sizes are set, check if they are consistent with that given in the file */ 3541 if (sizesset) { 3542 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3543 } 3544 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3545 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3546 3547 /* determine ownership of all (block) rows */ 3548 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3549 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3550 else m = newMat->rmap->n; /* Set by user */ 3551 3552 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3553 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3554 3555 /* First process needs enough room for process with most rows */ 3556 if (!rank) { 3557 mmax = rowners[1]; 3558 for (i=2; i<=size; i++) { 3559 mmax = PetscMax(mmax, rowners[i]); 3560 } 3561 } else mmax = -1; /* unused, but compilers complain */ 3562 3563 rowners[0] = 0; 3564 for (i=2; i<=size; i++) { 3565 rowners[i] += rowners[i-1]; 3566 } 3567 rstart = rowners[rank]; 3568 rend = rowners[rank+1]; 3569 3570 /* distribute row lengths to all processors */ 3571 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3572 if (!rank) { 3573 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3574 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3575 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3576 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3577 for (j=0; j<m; j++) { 3578 procsnz[0] += ourlens[j]; 3579 } 3580 for (i=1; i<size; i++) { 3581 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3582 /* calculate the number of nonzeros on each processor */ 3583 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3584 procsnz[i] += rowlengths[j]; 3585 } 3586 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3587 } 3588 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3589 } else { 3590 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3591 } 3592 3593 if (!rank) { 3594 /* determine max buffer needed and allocate it */ 3595 maxnz = 0; 3596 for (i=0; i<size; i++) { 3597 maxnz = PetscMax(maxnz,procsnz[i]); 3598 } 3599 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3600 3601 /* read in my part of the matrix column indices */ 3602 nz = procsnz[0]; 3603 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3604 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3605 3606 /* read in every one elses and ship off */ 3607 for (i=1; i<size; i++) { 3608 nz = procsnz[i]; 3609 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3610 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3611 } 3612 ierr = PetscFree(cols);CHKERRQ(ierr); 3613 } else { 3614 /* determine buffer space needed for message */ 3615 nz = 0; 3616 for (i=0; i<m; i++) { 3617 nz += ourlens[i]; 3618 } 3619 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3620 3621 /* receive message of column indices*/ 3622 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3623 } 3624 3625 /* determine column ownership if matrix is not square */ 3626 if (N != M) { 3627 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3628 else n = newMat->cmap->n; 3629 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3630 cstart = cend - n; 3631 } else { 3632 cstart = rstart; 3633 cend = rend; 3634 n = cend - cstart; 3635 } 3636 3637 /* loop over local rows, determining number of off diagonal entries */ 3638 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3639 jj = 0; 3640 for (i=0; i<m; i++) { 3641 for (j=0; j<ourlens[i]; j++) { 3642 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3643 jj++; 3644 } 3645 } 3646 3647 for (i=0; i<m; i++) { 3648 ourlens[i] -= offlens[i]; 3649 } 3650 if (!sizesset) { 3651 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3652 } 3653 3654 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3655 3656 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3657 3658 for (i=0; i<m; i++) { 3659 ourlens[i] += offlens[i]; 3660 } 3661 3662 if (!rank) { 3663 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3664 3665 /* read in my part of the matrix numerical values */ 3666 nz = procsnz[0]; 3667 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3668 3669 /* insert into matrix */ 3670 jj = rstart; 3671 smycols = mycols; 3672 svals = vals; 3673 for (i=0; i<m; i++) { 3674 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3675 smycols += ourlens[i]; 3676 svals += ourlens[i]; 3677 jj++; 3678 } 3679 3680 /* read in other processors and ship out */ 3681 for (i=1; i<size; i++) { 3682 nz = procsnz[i]; 3683 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3684 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3685 } 3686 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3687 } else { 3688 /* receive numeric values */ 3689 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3690 3691 /* receive message of values*/ 3692 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3693 3694 /* insert into matrix */ 3695 jj = rstart; 3696 smycols = mycols; 3697 svals = vals; 3698 for (i=0; i<m; i++) { 3699 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3700 smycols += ourlens[i]; 3701 svals += ourlens[i]; 3702 jj++; 3703 } 3704 } 3705 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3706 ierr = PetscFree(vals);CHKERRQ(ierr); 3707 ierr = PetscFree(mycols);CHKERRQ(ierr); 3708 ierr = PetscFree(rowners);CHKERRQ(ierr); 3709 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3710 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3711 PetscFunctionReturn(0); 3712 } 3713 3714 #undef __FUNCT__ 3715 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3716 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3717 { 3718 PetscErrorCode ierr; 3719 IS iscol_local; 3720 PetscInt csize; 3721 3722 PetscFunctionBegin; 3723 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3724 if (call == MAT_REUSE_MATRIX) { 3725 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3726 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3727 } else { 3728 PetscInt cbs; 3729 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3730 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3731 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3732 } 3733 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3734 if (call == MAT_INITIAL_MATRIX) { 3735 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3736 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3737 } 3738 PetscFunctionReturn(0); 3739 } 3740 3741 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3742 #undef __FUNCT__ 3743 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3744 /* 3745 Not great since it makes two copies of the submatrix, first an SeqAIJ 3746 in local and then by concatenating the local matrices the end result. 3747 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3748 3749 Note: This requires a sequential iscol with all indices. 3750 */ 3751 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3752 { 3753 PetscErrorCode ierr; 3754 PetscMPIInt rank,size; 3755 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3756 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3757 PetscBool allcolumns, colflag; 3758 Mat M,Mreuse; 3759 MatScalar *vwork,*aa; 3760 MPI_Comm comm; 3761 Mat_SeqAIJ *aij; 3762 3763 PetscFunctionBegin; 3764 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3765 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3766 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3767 3768 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3769 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3770 if (colflag && ncol == mat->cmap->N) { 3771 allcolumns = PETSC_TRUE; 3772 } else { 3773 allcolumns = PETSC_FALSE; 3774 } 3775 if (call == MAT_REUSE_MATRIX) { 3776 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3777 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3778 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3779 } else { 3780 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3781 } 3782 3783 /* 3784 m - number of local rows 3785 n - number of columns (same on all processors) 3786 rstart - first row in new global matrix generated 3787 */ 3788 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3789 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3790 if (call == MAT_INITIAL_MATRIX) { 3791 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3792 ii = aij->i; 3793 jj = aij->j; 3794 3795 /* 3796 Determine the number of non-zeros in the diagonal and off-diagonal 3797 portions of the matrix in order to do correct preallocation 3798 */ 3799 3800 /* first get start and end of "diagonal" columns */ 3801 if (csize == PETSC_DECIDE) { 3802 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3803 if (mglobal == n) { /* square matrix */ 3804 nlocal = m; 3805 } else { 3806 nlocal = n/size + ((n % size) > rank); 3807 } 3808 } else { 3809 nlocal = csize; 3810 } 3811 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3812 rstart = rend - nlocal; 3813 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3814 3815 /* next, compute all the lengths */ 3816 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3817 olens = dlens + m; 3818 for (i=0; i<m; i++) { 3819 jend = ii[i+1] - ii[i]; 3820 olen = 0; 3821 dlen = 0; 3822 for (j=0; j<jend; j++) { 3823 if (*jj < rstart || *jj >= rend) olen++; 3824 else dlen++; 3825 jj++; 3826 } 3827 olens[i] = olen; 3828 dlens[i] = dlen; 3829 } 3830 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3831 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3832 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3833 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3834 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3835 ierr = PetscFree(dlens);CHKERRQ(ierr); 3836 } else { 3837 PetscInt ml,nl; 3838 3839 M = *newmat; 3840 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3841 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3842 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3843 /* 3844 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3845 rather than the slower MatSetValues(). 3846 */ 3847 M->was_assembled = PETSC_TRUE; 3848 M->assembled = PETSC_FALSE; 3849 } 3850 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3851 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3852 ii = aij->i; 3853 jj = aij->j; 3854 aa = aij->a; 3855 for (i=0; i<m; i++) { 3856 row = rstart + i; 3857 nz = ii[i+1] - ii[i]; 3858 cwork = jj; jj += nz; 3859 vwork = aa; aa += nz; 3860 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3861 } 3862 3863 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3864 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 *newmat = M; 3866 3867 /* save submatrix used in processor for next request */ 3868 if (call == MAT_INITIAL_MATRIX) { 3869 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3870 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3871 } 3872 PetscFunctionReturn(0); 3873 } 3874 3875 #undef __FUNCT__ 3876 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3877 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3878 { 3879 PetscInt m,cstart, cend,j,nnz,i,d; 3880 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3881 const PetscInt *JJ; 3882 PetscScalar *values; 3883 PetscErrorCode ierr; 3884 3885 PetscFunctionBegin; 3886 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3887 3888 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3889 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3890 m = B->rmap->n; 3891 cstart = B->cmap->rstart; 3892 cend = B->cmap->rend; 3893 rstart = B->rmap->rstart; 3894 3895 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3896 3897 #if defined(PETSC_USE_DEBUGGING) 3898 for (i=0; i<m; i++) { 3899 nnz = Ii[i+1]- Ii[i]; 3900 JJ = J + Ii[i]; 3901 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3902 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3903 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3904 } 3905 #endif 3906 3907 for (i=0; i<m; i++) { 3908 nnz = Ii[i+1]- Ii[i]; 3909 JJ = J + Ii[i]; 3910 nnz_max = PetscMax(nnz_max,nnz); 3911 d = 0; 3912 for (j=0; j<nnz; j++) { 3913 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3914 } 3915 d_nnz[i] = d; 3916 o_nnz[i] = nnz - d; 3917 } 3918 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3919 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3920 3921 if (v) values = (PetscScalar*)v; 3922 else { 3923 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3924 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3925 } 3926 3927 for (i=0; i<m; i++) { 3928 ii = i + rstart; 3929 nnz = Ii[i+1]- Ii[i]; 3930 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3931 } 3932 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3933 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3934 3935 if (!v) { 3936 ierr = PetscFree(values);CHKERRQ(ierr); 3937 } 3938 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3939 PetscFunctionReturn(0); 3940 } 3941 3942 #undef __FUNCT__ 3943 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3944 /*@ 3945 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3946 (the default parallel PETSc format). 3947 3948 Collective on MPI_Comm 3949 3950 Input Parameters: 3951 + B - the matrix 3952 . i - the indices into j for the start of each local row (starts with zero) 3953 . j - the column indices for each local row (starts with zero) 3954 - v - optional values in the matrix 3955 3956 Level: developer 3957 3958 Notes: 3959 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3960 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3961 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3962 3963 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3964 3965 The format which is used for the sparse matrix input, is equivalent to a 3966 row-major ordering.. i.e for the following matrix, the input data expected is 3967 as shown: 3968 3969 1 0 0 3970 2 0 3 P0 3971 ------- 3972 4 5 6 P1 3973 3974 Process0 [P0]: rows_owned=[0,1] 3975 i = {0,1,3} [size = nrow+1 = 2+1] 3976 j = {0,0,2} [size = nz = 6] 3977 v = {1,2,3} [size = nz = 6] 3978 3979 Process1 [P1]: rows_owned=[2] 3980 i = {0,3} [size = nrow+1 = 1+1] 3981 j = {0,1,2} [size = nz = 6] 3982 v = {4,5,6} [size = nz = 6] 3983 3984 .keywords: matrix, aij, compressed row, sparse, parallel 3985 3986 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3987 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3988 @*/ 3989 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3990 { 3991 PetscErrorCode ierr; 3992 3993 PetscFunctionBegin; 3994 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3995 PetscFunctionReturn(0); 3996 } 3997 3998 #undef __FUNCT__ 3999 #define __FUNCT__ "MatMPIAIJSetPreallocation" 4000 /*@C 4001 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4002 (the default parallel PETSc format). For good matrix assembly performance 4003 the user should preallocate the matrix storage by setting the parameters 4004 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4005 performance can be increased by more than a factor of 50. 4006 4007 Collective on MPI_Comm 4008 4009 Input Parameters: 4010 + A - the matrix 4011 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4012 (same value is used for all local rows) 4013 . d_nnz - array containing the number of nonzeros in the various rows of the 4014 DIAGONAL portion of the local submatrix (possibly different for each row) 4015 or NULL, if d_nz is used to specify the nonzero structure. 4016 The size of this array is equal to the number of local rows, i.e 'm'. 4017 For matrices that will be factored, you must leave room for (and set) 4018 the diagonal entry even if it is zero. 4019 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4020 submatrix (same value is used for all local rows). 4021 - o_nnz - array containing the number of nonzeros in the various rows of the 4022 OFF-DIAGONAL portion of the local submatrix (possibly different for 4023 each row) or NULL, if o_nz is used to specify the nonzero 4024 structure. The size of this array is equal to the number 4025 of local rows, i.e 'm'. 4026 4027 If the *_nnz parameter is given then the *_nz parameter is ignored 4028 4029 The AIJ format (also called the Yale sparse matrix format or 4030 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4031 storage. The stored row and column indices begin with zero. 4032 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 4033 4034 The parallel matrix is partitioned such that the first m0 rows belong to 4035 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4036 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4037 4038 The DIAGONAL portion of the local submatrix of a processor can be defined 4039 as the submatrix which is obtained by extraction the part corresponding to 4040 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4041 first row that belongs to the processor, r2 is the last row belonging to 4042 the this processor, and c1-c2 is range of indices of the local part of a 4043 vector suitable for applying the matrix to. This is an mxn matrix. In the 4044 common case of a square matrix, the row and column ranges are the same and 4045 the DIAGONAL part is also square. The remaining portion of the local 4046 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4047 4048 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4049 4050 You can call MatGetInfo() to get information on how effective the preallocation was; 4051 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4052 You can also run with the option -info and look for messages with the string 4053 malloc in them to see if additional memory allocation was needed. 4054 4055 Example usage: 4056 4057 Consider the following 8x8 matrix with 34 non-zero values, that is 4058 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4059 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4060 as follows: 4061 4062 .vb 4063 1 2 0 | 0 3 0 | 0 4 4064 Proc0 0 5 6 | 7 0 0 | 8 0 4065 9 0 10 | 11 0 0 | 12 0 4066 ------------------------------------- 4067 13 0 14 | 15 16 17 | 0 0 4068 Proc1 0 18 0 | 19 20 21 | 0 0 4069 0 0 0 | 22 23 0 | 24 0 4070 ------------------------------------- 4071 Proc2 25 26 27 | 0 0 28 | 29 0 4072 30 0 0 | 31 32 33 | 0 34 4073 .ve 4074 4075 This can be represented as a collection of submatrices as: 4076 4077 .vb 4078 A B C 4079 D E F 4080 G H I 4081 .ve 4082 4083 Where the submatrices A,B,C are owned by proc0, D,E,F are 4084 owned by proc1, G,H,I are owned by proc2. 4085 4086 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4087 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4088 The 'M','N' parameters are 8,8, and have the same values on all procs. 4089 4090 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4091 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4092 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4093 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4094 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4095 matrix, ans [DF] as another SeqAIJ matrix. 4096 4097 When d_nz, o_nz parameters are specified, d_nz storage elements are 4098 allocated for every row of the local diagonal submatrix, and o_nz 4099 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4100 One way to choose d_nz and o_nz is to use the max nonzerors per local 4101 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4102 In this case, the values of d_nz,o_nz are: 4103 .vb 4104 proc0 : dnz = 2, o_nz = 2 4105 proc1 : dnz = 3, o_nz = 2 4106 proc2 : dnz = 1, o_nz = 4 4107 .ve 4108 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4109 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4110 for proc3. i.e we are using 12+15+10=37 storage locations to store 4111 34 values. 4112 4113 When d_nnz, o_nnz parameters are specified, the storage is specified 4114 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4115 In the above case the values for d_nnz,o_nnz are: 4116 .vb 4117 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4118 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4119 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4120 .ve 4121 Here the space allocated is sum of all the above values i.e 34, and 4122 hence pre-allocation is perfect. 4123 4124 Level: intermediate 4125 4126 .keywords: matrix, aij, compressed row, sparse, parallel 4127 4128 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4129 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4130 @*/ 4131 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4132 { 4133 PetscErrorCode ierr; 4134 4135 PetscFunctionBegin; 4136 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4137 PetscValidType(B,1); 4138 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4139 PetscFunctionReturn(0); 4140 } 4141 4142 #undef __FUNCT__ 4143 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4144 /*@ 4145 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4146 CSR format the local rows. 4147 4148 Collective on MPI_Comm 4149 4150 Input Parameters: 4151 + comm - MPI communicator 4152 . m - number of local rows (Cannot be PETSC_DECIDE) 4153 . n - This value should be the same as the local size used in creating the 4154 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4155 calculated if N is given) For square matrices n is almost always m. 4156 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4157 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4158 . i - row indices 4159 . j - column indices 4160 - a - matrix values 4161 4162 Output Parameter: 4163 . mat - the matrix 4164 4165 Level: intermediate 4166 4167 Notes: 4168 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4169 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4170 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4171 4172 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4173 4174 The format which is used for the sparse matrix input, is equivalent to a 4175 row-major ordering.. i.e for the following matrix, the input data expected is 4176 as shown: 4177 4178 1 0 0 4179 2 0 3 P0 4180 ------- 4181 4 5 6 P1 4182 4183 Process0 [P0]: rows_owned=[0,1] 4184 i = {0,1,3} [size = nrow+1 = 2+1] 4185 j = {0,0,2} [size = nz = 6] 4186 v = {1,2,3} [size = nz = 6] 4187 4188 Process1 [P1]: rows_owned=[2] 4189 i = {0,3} [size = nrow+1 = 1+1] 4190 j = {0,1,2} [size = nz = 6] 4191 v = {4,5,6} [size = nz = 6] 4192 4193 .keywords: matrix, aij, compressed row, sparse, parallel 4194 4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4196 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4197 @*/ 4198 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4199 { 4200 PetscErrorCode ierr; 4201 4202 PetscFunctionBegin; 4203 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4204 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4205 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4206 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4207 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4208 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4209 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4210 PetscFunctionReturn(0); 4211 } 4212 4213 #undef __FUNCT__ 4214 #define __FUNCT__ "MatCreateAIJ" 4215 /*@C 4216 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4217 (the default parallel PETSc format). For good matrix assembly performance 4218 the user should preallocate the matrix storage by setting the parameters 4219 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4220 performance can be increased by more than a factor of 50. 4221 4222 Collective on MPI_Comm 4223 4224 Input Parameters: 4225 + comm - MPI communicator 4226 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4227 This value should be the same as the local size used in creating the 4228 y vector for the matrix-vector product y = Ax. 4229 . n - This value should be the same as the local size used in creating the 4230 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4231 calculated if N is given) For square matrices n is almost always m. 4232 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4233 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4234 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4235 (same value is used for all local rows) 4236 . d_nnz - array containing the number of nonzeros in the various rows of the 4237 DIAGONAL portion of the local submatrix (possibly different for each row) 4238 or NULL, if d_nz is used to specify the nonzero structure. 4239 The size of this array is equal to the number of local rows, i.e 'm'. 4240 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4241 submatrix (same value is used for all local rows). 4242 - o_nnz - array containing the number of nonzeros in the various rows of the 4243 OFF-DIAGONAL portion of the local submatrix (possibly different for 4244 each row) or NULL, if o_nz is used to specify the nonzero 4245 structure. The size of this array is equal to the number 4246 of local rows, i.e 'm'. 4247 4248 Output Parameter: 4249 . A - the matrix 4250 4251 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4252 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4253 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4254 4255 Notes: 4256 If the *_nnz parameter is given then the *_nz parameter is ignored 4257 4258 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4259 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4260 storage requirements for this matrix. 4261 4262 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4263 processor than it must be used on all processors that share the object for 4264 that argument. 4265 4266 The user MUST specify either the local or global matrix dimensions 4267 (possibly both). 4268 4269 The parallel matrix is partitioned across processors such that the 4270 first m0 rows belong to process 0, the next m1 rows belong to 4271 process 1, the next m2 rows belong to process 2 etc.. where 4272 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4273 values corresponding to [m x N] submatrix. 4274 4275 The columns are logically partitioned with the n0 columns belonging 4276 to 0th partition, the next n1 columns belonging to the next 4277 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4278 4279 The DIAGONAL portion of the local submatrix on any given processor 4280 is the submatrix corresponding to the rows and columns m,n 4281 corresponding to the given processor. i.e diagonal matrix on 4282 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4283 etc. The remaining portion of the local submatrix [m x (N-n)] 4284 constitute the OFF-DIAGONAL portion. The example below better 4285 illustrates this concept. 4286 4287 For a square global matrix we define each processor's diagonal portion 4288 to be its local rows and the corresponding columns (a square submatrix); 4289 each processor's off-diagonal portion encompasses the remainder of the 4290 local matrix (a rectangular submatrix). 4291 4292 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4293 4294 When calling this routine with a single process communicator, a matrix of 4295 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4296 type of communicator, use the construction mechanism: 4297 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4298 4299 By default, this format uses inodes (identical nodes) when possible. 4300 We search for consecutive rows with the same nonzero structure, thereby 4301 reusing matrix information to achieve increased efficiency. 4302 4303 Options Database Keys: 4304 + -mat_no_inode - Do not use inodes 4305 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4306 - -mat_aij_oneindex - Internally use indexing starting at 1 4307 rather than 0. Note that when calling MatSetValues(), 4308 the user still MUST index entries starting at 0! 4309 4310 4311 Example usage: 4312 4313 Consider the following 8x8 matrix with 34 non-zero values, that is 4314 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4315 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4316 as follows: 4317 4318 .vb 4319 1 2 0 | 0 3 0 | 0 4 4320 Proc0 0 5 6 | 7 0 0 | 8 0 4321 9 0 10 | 11 0 0 | 12 0 4322 ------------------------------------- 4323 13 0 14 | 15 16 17 | 0 0 4324 Proc1 0 18 0 | 19 20 21 | 0 0 4325 0 0 0 | 22 23 0 | 24 0 4326 ------------------------------------- 4327 Proc2 25 26 27 | 0 0 28 | 29 0 4328 30 0 0 | 31 32 33 | 0 34 4329 .ve 4330 4331 This can be represented as a collection of submatrices as: 4332 4333 .vb 4334 A B C 4335 D E F 4336 G H I 4337 .ve 4338 4339 Where the submatrices A,B,C are owned by proc0, D,E,F are 4340 owned by proc1, G,H,I are owned by proc2. 4341 4342 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4343 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4344 The 'M','N' parameters are 8,8, and have the same values on all procs. 4345 4346 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4347 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4348 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4349 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4350 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4351 matrix, ans [DF] as another SeqAIJ matrix. 4352 4353 When d_nz, o_nz parameters are specified, d_nz storage elements are 4354 allocated for every row of the local diagonal submatrix, and o_nz 4355 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4356 One way to choose d_nz and o_nz is to use the max nonzerors per local 4357 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4358 In this case, the values of d_nz,o_nz are: 4359 .vb 4360 proc0 : dnz = 2, o_nz = 2 4361 proc1 : dnz = 3, o_nz = 2 4362 proc2 : dnz = 1, o_nz = 4 4363 .ve 4364 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4365 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4366 for proc3. i.e we are using 12+15+10=37 storage locations to store 4367 34 values. 4368 4369 When d_nnz, o_nnz parameters are specified, the storage is specified 4370 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4371 In the above case the values for d_nnz,o_nnz are: 4372 .vb 4373 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4374 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4375 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4376 .ve 4377 Here the space allocated is sum of all the above values i.e 34, and 4378 hence pre-allocation is perfect. 4379 4380 Level: intermediate 4381 4382 .keywords: matrix, aij, compressed row, sparse, parallel 4383 4384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4385 MPIAIJ, MatCreateMPIAIJWithArrays() 4386 @*/ 4387 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4388 { 4389 PetscErrorCode ierr; 4390 PetscMPIInt size; 4391 4392 PetscFunctionBegin; 4393 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4394 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4395 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4396 if (size > 1) { 4397 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4398 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4399 } else { 4400 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4401 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4402 } 4403 PetscFunctionReturn(0); 4404 } 4405 4406 #undef __FUNCT__ 4407 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4408 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4409 { 4410 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4411 4412 PetscFunctionBegin; 4413 *Ad = a->A; 4414 *Ao = a->B; 4415 *colmap = a->garray; 4416 PetscFunctionReturn(0); 4417 } 4418 4419 #undef __FUNCT__ 4420 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4421 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4422 { 4423 PetscErrorCode ierr; 4424 PetscInt i; 4425 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4426 4427 PetscFunctionBegin; 4428 if (coloring->ctype == IS_COLORING_GLOBAL) { 4429 ISColoringValue *allcolors,*colors; 4430 ISColoring ocoloring; 4431 4432 /* set coloring for diagonal portion */ 4433 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4434 4435 /* set coloring for off-diagonal portion */ 4436 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4437 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4438 for (i=0; i<a->B->cmap->n; i++) { 4439 colors[i] = allcolors[a->garray[i]]; 4440 } 4441 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4442 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4443 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4444 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4445 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4446 ISColoringValue *colors; 4447 PetscInt *larray; 4448 ISColoring ocoloring; 4449 4450 /* set coloring for diagonal portion */ 4451 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4452 for (i=0; i<a->A->cmap->n; i++) { 4453 larray[i] = i + A->cmap->rstart; 4454 } 4455 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4456 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4457 for (i=0; i<a->A->cmap->n; i++) { 4458 colors[i] = coloring->colors[larray[i]]; 4459 } 4460 ierr = PetscFree(larray);CHKERRQ(ierr); 4461 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4462 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4463 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4464 4465 /* set coloring for off-diagonal portion */ 4466 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4467 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4468 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4469 for (i=0; i<a->B->cmap->n; i++) { 4470 colors[i] = coloring->colors[larray[i]]; 4471 } 4472 ierr = PetscFree(larray);CHKERRQ(ierr); 4473 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4474 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4475 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4476 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4477 PetscFunctionReturn(0); 4478 } 4479 4480 #undef __FUNCT__ 4481 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4482 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4483 { 4484 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4485 PetscErrorCode ierr; 4486 4487 PetscFunctionBegin; 4488 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4489 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4490 PetscFunctionReturn(0); 4491 } 4492 4493 #undef __FUNCT__ 4494 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4495 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4496 { 4497 PetscErrorCode ierr; 4498 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4499 PetscInt *indx; 4500 4501 PetscFunctionBegin; 4502 /* This routine will ONLY return MPIAIJ type matrix */ 4503 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4504 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4505 if (n == PETSC_DECIDE) { 4506 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4507 } 4508 /* Check sum(n) = N */ 4509 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4510 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4511 4512 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4513 rstart -= m; 4514 4515 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4516 for (i=0; i<m; i++) { 4517 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4518 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4519 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4520 } 4521 4522 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4523 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4524 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4525 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4526 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4527 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4528 PetscFunctionReturn(0); 4529 } 4530 4531 #undef __FUNCT__ 4532 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4533 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4534 { 4535 PetscErrorCode ierr; 4536 PetscInt m,N,i,rstart,nnz,Ii; 4537 PetscInt *indx; 4538 PetscScalar *values; 4539 4540 PetscFunctionBegin; 4541 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4542 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4543 for (i=0; i<m; i++) { 4544 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4545 Ii = i + rstart; 4546 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4547 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4548 } 4549 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4550 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4551 PetscFunctionReturn(0); 4552 } 4553 4554 #undef __FUNCT__ 4555 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4556 /*@ 4557 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4558 matrices from each processor 4559 4560 Collective on MPI_Comm 4561 4562 Input Parameters: 4563 + comm - the communicators the parallel matrix will live on 4564 . inmat - the input sequential matrices 4565 . n - number of local columns (or PETSC_DECIDE) 4566 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4567 4568 Output Parameter: 4569 . outmat - the parallel matrix generated 4570 4571 Level: advanced 4572 4573 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4574 4575 @*/ 4576 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4577 { 4578 PetscErrorCode ierr; 4579 PetscMPIInt size; 4580 4581 PetscFunctionBegin; 4582 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4583 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4584 if (size == 1) { 4585 if (scall == MAT_INITIAL_MATRIX) { 4586 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4587 } else { 4588 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4589 } 4590 } else { 4591 if (scall == MAT_INITIAL_MATRIX) { 4592 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4593 } 4594 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4595 } 4596 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4597 PetscFunctionReturn(0); 4598 } 4599 4600 #undef __FUNCT__ 4601 #define __FUNCT__ "MatFileSplit" 4602 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4603 { 4604 PetscErrorCode ierr; 4605 PetscMPIInt rank; 4606 PetscInt m,N,i,rstart,nnz; 4607 size_t len; 4608 const PetscInt *indx; 4609 PetscViewer out; 4610 char *name; 4611 Mat B; 4612 const PetscScalar *values; 4613 4614 PetscFunctionBegin; 4615 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4616 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4617 /* Should this be the type of the diagonal block of A? */ 4618 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4619 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4620 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4621 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4622 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4623 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4624 for (i=0; i<m; i++) { 4625 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4626 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4627 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4628 } 4629 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4630 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4631 4632 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4633 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4634 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4635 sprintf(name,"%s.%d",outfile,rank); 4636 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4637 ierr = PetscFree(name);CHKERRQ(ierr); 4638 ierr = MatView(B,out);CHKERRQ(ierr); 4639 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4640 ierr = MatDestroy(&B);CHKERRQ(ierr); 4641 PetscFunctionReturn(0); 4642 } 4643 4644 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4645 #undef __FUNCT__ 4646 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4647 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4648 { 4649 PetscErrorCode ierr; 4650 Mat_Merge_SeqsToMPI *merge; 4651 PetscContainer container; 4652 4653 PetscFunctionBegin; 4654 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4655 if (container) { 4656 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4657 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4658 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4659 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4660 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4661 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4662 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4663 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4664 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4665 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4666 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4667 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4668 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4669 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4670 ierr = PetscFree(merge);CHKERRQ(ierr); 4671 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4672 } 4673 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4674 PetscFunctionReturn(0); 4675 } 4676 4677 #include <../src/mat/utils/freespace.h> 4678 #include <petscbt.h> 4679 4680 #undef __FUNCT__ 4681 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4682 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4683 { 4684 PetscErrorCode ierr; 4685 MPI_Comm comm; 4686 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4687 PetscMPIInt size,rank,taga,*len_s; 4688 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4689 PetscInt proc,m; 4690 PetscInt **buf_ri,**buf_rj; 4691 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4692 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4693 MPI_Request *s_waits,*r_waits; 4694 MPI_Status *status; 4695 MatScalar *aa=a->a; 4696 MatScalar **abuf_r,*ba_i; 4697 Mat_Merge_SeqsToMPI *merge; 4698 PetscContainer container; 4699 4700 PetscFunctionBegin; 4701 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4702 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4703 4704 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4705 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4706 4707 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4708 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4709 4710 bi = merge->bi; 4711 bj = merge->bj; 4712 buf_ri = merge->buf_ri; 4713 buf_rj = merge->buf_rj; 4714 4715 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4716 owners = merge->rowmap->range; 4717 len_s = merge->len_s; 4718 4719 /* send and recv matrix values */ 4720 /*-----------------------------*/ 4721 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4722 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4723 4724 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4725 for (proc=0,k=0; proc<size; proc++) { 4726 if (!len_s[proc]) continue; 4727 i = owners[proc]; 4728 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4729 k++; 4730 } 4731 4732 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4733 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4734 ierr = PetscFree(status);CHKERRQ(ierr); 4735 4736 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4737 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4738 4739 /* insert mat values of mpimat */ 4740 /*----------------------------*/ 4741 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4742 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4743 4744 for (k=0; k<merge->nrecv; k++) { 4745 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4746 nrows = *(buf_ri_k[k]); 4747 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4748 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4749 } 4750 4751 /* set values of ba */ 4752 m = merge->rowmap->n; 4753 for (i=0; i<m; i++) { 4754 arow = owners[rank] + i; 4755 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4756 bnzi = bi[i+1] - bi[i]; 4757 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4758 4759 /* add local non-zero vals of this proc's seqmat into ba */ 4760 anzi = ai[arow+1] - ai[arow]; 4761 aj = a->j + ai[arow]; 4762 aa = a->a + ai[arow]; 4763 nextaj = 0; 4764 for (j=0; nextaj<anzi; j++) { 4765 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4766 ba_i[j] += aa[nextaj++]; 4767 } 4768 } 4769 4770 /* add received vals into ba */ 4771 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4772 /* i-th row */ 4773 if (i == *nextrow[k]) { 4774 anzi = *(nextai[k]+1) - *nextai[k]; 4775 aj = buf_rj[k] + *(nextai[k]); 4776 aa = abuf_r[k] + *(nextai[k]); 4777 nextaj = 0; 4778 for (j=0; nextaj<anzi; j++) { 4779 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4780 ba_i[j] += aa[nextaj++]; 4781 } 4782 } 4783 nextrow[k]++; nextai[k]++; 4784 } 4785 } 4786 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4787 } 4788 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4789 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4790 4791 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4792 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4793 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4794 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4795 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4796 PetscFunctionReturn(0); 4797 } 4798 4799 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4800 4801 #undef __FUNCT__ 4802 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4803 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4804 { 4805 PetscErrorCode ierr; 4806 Mat B_mpi; 4807 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4808 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4809 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4810 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4811 PetscInt len,proc,*dnz,*onz,bs,cbs; 4812 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4813 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4814 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4815 MPI_Status *status; 4816 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4817 PetscBT lnkbt; 4818 Mat_Merge_SeqsToMPI *merge; 4819 PetscContainer container; 4820 4821 PetscFunctionBegin; 4822 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4823 4824 /* make sure it is a PETSc comm */ 4825 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4826 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4827 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4828 4829 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4830 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4831 4832 /* determine row ownership */ 4833 /*---------------------------------------------------------*/ 4834 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4835 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4836 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4837 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4838 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4839 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4840 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4841 4842 m = merge->rowmap->n; 4843 owners = merge->rowmap->range; 4844 4845 /* determine the number of messages to send, their lengths */ 4846 /*---------------------------------------------------------*/ 4847 len_s = merge->len_s; 4848 4849 len = 0; /* length of buf_si[] */ 4850 merge->nsend = 0; 4851 for (proc=0; proc<size; proc++) { 4852 len_si[proc] = 0; 4853 if (proc == rank) { 4854 len_s[proc] = 0; 4855 } else { 4856 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4857 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4858 } 4859 if (len_s[proc]) { 4860 merge->nsend++; 4861 nrows = 0; 4862 for (i=owners[proc]; i<owners[proc+1]; i++) { 4863 if (ai[i+1] > ai[i]) nrows++; 4864 } 4865 len_si[proc] = 2*(nrows+1); 4866 len += len_si[proc]; 4867 } 4868 } 4869 4870 /* determine the number and length of messages to receive for ij-structure */ 4871 /*-------------------------------------------------------------------------*/ 4872 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4873 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4874 4875 /* post the Irecv of j-structure */ 4876 /*-------------------------------*/ 4877 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4878 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4879 4880 /* post the Isend of j-structure */ 4881 /*--------------------------------*/ 4882 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4883 4884 for (proc=0, k=0; proc<size; proc++) { 4885 if (!len_s[proc]) continue; 4886 i = owners[proc]; 4887 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4888 k++; 4889 } 4890 4891 /* receives and sends of j-structure are complete */ 4892 /*------------------------------------------------*/ 4893 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4894 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4895 4896 /* send and recv i-structure */ 4897 /*---------------------------*/ 4898 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4899 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4900 4901 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4902 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4903 for (proc=0,k=0; proc<size; proc++) { 4904 if (!len_s[proc]) continue; 4905 /* form outgoing message for i-structure: 4906 buf_si[0]: nrows to be sent 4907 [1:nrows]: row index (global) 4908 [nrows+1:2*nrows+1]: i-structure index 4909 */ 4910 /*-------------------------------------------*/ 4911 nrows = len_si[proc]/2 - 1; 4912 buf_si_i = buf_si + nrows+1; 4913 buf_si[0] = nrows; 4914 buf_si_i[0] = 0; 4915 nrows = 0; 4916 for (i=owners[proc]; i<owners[proc+1]; i++) { 4917 anzi = ai[i+1] - ai[i]; 4918 if (anzi) { 4919 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4920 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4921 nrows++; 4922 } 4923 } 4924 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4925 k++; 4926 buf_si += len_si[proc]; 4927 } 4928 4929 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4930 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4931 4932 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4933 for (i=0; i<merge->nrecv; i++) { 4934 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4935 } 4936 4937 ierr = PetscFree(len_si);CHKERRQ(ierr); 4938 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4939 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4940 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4941 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4942 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4943 ierr = PetscFree(status);CHKERRQ(ierr); 4944 4945 /* compute a local seq matrix in each processor */ 4946 /*----------------------------------------------*/ 4947 /* allocate bi array and free space for accumulating nonzero column info */ 4948 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4949 bi[0] = 0; 4950 4951 /* create and initialize a linked list */ 4952 nlnk = N+1; 4953 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4954 4955 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4956 len = ai[owners[rank+1]] - ai[owners[rank]]; 4957 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4958 4959 current_space = free_space; 4960 4961 /* determine symbolic info for each local row */ 4962 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4963 4964 for (k=0; k<merge->nrecv; k++) { 4965 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4966 nrows = *buf_ri_k[k]; 4967 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4968 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4969 } 4970 4971 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4972 len = 0; 4973 for (i=0; i<m; i++) { 4974 bnzi = 0; 4975 /* add local non-zero cols of this proc's seqmat into lnk */ 4976 arow = owners[rank] + i; 4977 anzi = ai[arow+1] - ai[arow]; 4978 aj = a->j + ai[arow]; 4979 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4980 bnzi += nlnk; 4981 /* add received col data into lnk */ 4982 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4983 if (i == *nextrow[k]) { /* i-th row */ 4984 anzi = *(nextai[k]+1) - *nextai[k]; 4985 aj = buf_rj[k] + *nextai[k]; 4986 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4987 bnzi += nlnk; 4988 nextrow[k]++; nextai[k]++; 4989 } 4990 } 4991 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4992 4993 /* if free space is not available, make more free space */ 4994 if (current_space->local_remaining<bnzi) { 4995 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4996 nspacedouble++; 4997 } 4998 /* copy data into free space, then initialize lnk */ 4999 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5000 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5001 5002 current_space->array += bnzi; 5003 current_space->local_used += bnzi; 5004 current_space->local_remaining -= bnzi; 5005 5006 bi[i+1] = bi[i] + bnzi; 5007 } 5008 5009 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5010 5011 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 5012 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5013 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5014 5015 /* create symbolic parallel matrix B_mpi */ 5016 /*---------------------------------------*/ 5017 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5018 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5019 if (n==PETSC_DECIDE) { 5020 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5021 } else { 5022 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5023 } 5024 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5025 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5026 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5027 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5028 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5029 5030 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5031 B_mpi->assembled = PETSC_FALSE; 5032 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5033 merge->bi = bi; 5034 merge->bj = bj; 5035 merge->buf_ri = buf_ri; 5036 merge->buf_rj = buf_rj; 5037 merge->coi = NULL; 5038 merge->coj = NULL; 5039 merge->owners_co = NULL; 5040 5041 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5042 5043 /* attach the supporting struct to B_mpi for reuse */ 5044 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5045 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5046 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5047 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5048 *mpimat = B_mpi; 5049 5050 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5051 PetscFunctionReturn(0); 5052 } 5053 5054 #undef __FUNCT__ 5055 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 5056 /*@C 5057 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 5058 matrices from each processor 5059 5060 Collective on MPI_Comm 5061 5062 Input Parameters: 5063 + comm - the communicators the parallel matrix will live on 5064 . seqmat - the input sequential matrices 5065 . m - number of local rows (or PETSC_DECIDE) 5066 . n - number of local columns (or PETSC_DECIDE) 5067 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5068 5069 Output Parameter: 5070 . mpimat - the parallel matrix generated 5071 5072 Level: advanced 5073 5074 Notes: 5075 The dimensions of the sequential matrix in each processor MUST be the same. 5076 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5077 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5078 @*/ 5079 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5080 { 5081 PetscErrorCode ierr; 5082 PetscMPIInt size; 5083 5084 PetscFunctionBegin; 5085 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5086 if (size == 1) { 5087 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5088 if (scall == MAT_INITIAL_MATRIX) { 5089 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5090 } else { 5091 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5092 } 5093 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5094 PetscFunctionReturn(0); 5095 } 5096 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5097 if (scall == MAT_INITIAL_MATRIX) { 5098 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5099 } 5100 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5101 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5102 PetscFunctionReturn(0); 5103 } 5104 5105 #undef __FUNCT__ 5106 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5107 /*@ 5108 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5109 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5110 with MatGetSize() 5111 5112 Not Collective 5113 5114 Input Parameters: 5115 + A - the matrix 5116 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5117 5118 Output Parameter: 5119 . A_loc - the local sequential matrix generated 5120 5121 Level: developer 5122 5123 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5124 5125 @*/ 5126 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5127 { 5128 PetscErrorCode ierr; 5129 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5130 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5131 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5132 MatScalar *aa=a->a,*ba=b->a,*cam; 5133 PetscScalar *ca; 5134 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5135 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5136 PetscBool match; 5137 5138 PetscFunctionBegin; 5139 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5140 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5141 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5142 if (scall == MAT_INITIAL_MATRIX) { 5143 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5144 ci[0] = 0; 5145 for (i=0; i<am; i++) { 5146 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5147 } 5148 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5149 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5150 k = 0; 5151 for (i=0; i<am; i++) { 5152 ncols_o = bi[i+1] - bi[i]; 5153 ncols_d = ai[i+1] - ai[i]; 5154 /* off-diagonal portion of A */ 5155 for (jo=0; jo<ncols_o; jo++) { 5156 col = cmap[*bj]; 5157 if (col >= cstart) break; 5158 cj[k] = col; bj++; 5159 ca[k++] = *ba++; 5160 } 5161 /* diagonal portion of A */ 5162 for (j=0; j<ncols_d; j++) { 5163 cj[k] = cstart + *aj++; 5164 ca[k++] = *aa++; 5165 } 5166 /* off-diagonal portion of A */ 5167 for (j=jo; j<ncols_o; j++) { 5168 cj[k] = cmap[*bj++]; 5169 ca[k++] = *ba++; 5170 } 5171 } 5172 /* put together the new matrix */ 5173 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5174 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5175 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5176 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5177 mat->free_a = PETSC_TRUE; 5178 mat->free_ij = PETSC_TRUE; 5179 mat->nonew = 0; 5180 } else if (scall == MAT_REUSE_MATRIX) { 5181 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5182 ci = mat->i; cj = mat->j; cam = mat->a; 5183 for (i=0; i<am; i++) { 5184 /* off-diagonal portion of A */ 5185 ncols_o = bi[i+1] - bi[i]; 5186 for (jo=0; jo<ncols_o; jo++) { 5187 col = cmap[*bj]; 5188 if (col >= cstart) break; 5189 *cam++ = *ba++; bj++; 5190 } 5191 /* diagonal portion of A */ 5192 ncols_d = ai[i+1] - ai[i]; 5193 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5194 /* off-diagonal portion of A */ 5195 for (j=jo; j<ncols_o; j++) { 5196 *cam++ = *ba++; bj++; 5197 } 5198 } 5199 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5200 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5201 PetscFunctionReturn(0); 5202 } 5203 5204 #undef __FUNCT__ 5205 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5206 /*@C 5207 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5208 5209 Not Collective 5210 5211 Input Parameters: 5212 + A - the matrix 5213 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5214 - row, col - index sets of rows and columns to extract (or NULL) 5215 5216 Output Parameter: 5217 . A_loc - the local sequential matrix generated 5218 5219 Level: developer 5220 5221 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5222 5223 @*/ 5224 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5225 { 5226 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5227 PetscErrorCode ierr; 5228 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5229 IS isrowa,iscola; 5230 Mat *aloc; 5231 PetscBool match; 5232 5233 PetscFunctionBegin; 5234 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5235 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5236 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5237 if (!row) { 5238 start = A->rmap->rstart; end = A->rmap->rend; 5239 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5240 } else { 5241 isrowa = *row; 5242 } 5243 if (!col) { 5244 start = A->cmap->rstart; 5245 cmap = a->garray; 5246 nzA = a->A->cmap->n; 5247 nzB = a->B->cmap->n; 5248 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5249 ncols = 0; 5250 for (i=0; i<nzB; i++) { 5251 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5252 else break; 5253 } 5254 imark = i; 5255 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5256 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5257 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5258 } else { 5259 iscola = *col; 5260 } 5261 if (scall != MAT_INITIAL_MATRIX) { 5262 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5263 aloc[0] = *A_loc; 5264 } 5265 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5266 *A_loc = aloc[0]; 5267 ierr = PetscFree(aloc);CHKERRQ(ierr); 5268 if (!row) { 5269 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5270 } 5271 if (!col) { 5272 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5273 } 5274 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5275 PetscFunctionReturn(0); 5276 } 5277 5278 #undef __FUNCT__ 5279 #define __FUNCT__ "MatGetBrowsOfAcols" 5280 /*@C 5281 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5282 5283 Collective on Mat 5284 5285 Input Parameters: 5286 + A,B - the matrices in mpiaij format 5287 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5288 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5289 5290 Output Parameter: 5291 + rowb, colb - index sets of rows and columns of B to extract 5292 - B_seq - the sequential matrix generated 5293 5294 Level: developer 5295 5296 @*/ 5297 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5298 { 5299 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5300 PetscErrorCode ierr; 5301 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5302 IS isrowb,iscolb; 5303 Mat *bseq=NULL; 5304 5305 PetscFunctionBegin; 5306 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5307 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5308 } 5309 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5310 5311 if (scall == MAT_INITIAL_MATRIX) { 5312 start = A->cmap->rstart; 5313 cmap = a->garray; 5314 nzA = a->A->cmap->n; 5315 nzB = a->B->cmap->n; 5316 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5317 ncols = 0; 5318 for (i=0; i<nzB; i++) { /* row < local row index */ 5319 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5320 else break; 5321 } 5322 imark = i; 5323 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5324 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5325 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5326 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5327 } else { 5328 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5329 isrowb = *rowb; iscolb = *colb; 5330 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5331 bseq[0] = *B_seq; 5332 } 5333 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5334 *B_seq = bseq[0]; 5335 ierr = PetscFree(bseq);CHKERRQ(ierr); 5336 if (!rowb) { 5337 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5338 } else { 5339 *rowb = isrowb; 5340 } 5341 if (!colb) { 5342 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5343 } else { 5344 *colb = iscolb; 5345 } 5346 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5347 PetscFunctionReturn(0); 5348 } 5349 5350 #undef __FUNCT__ 5351 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5352 /* 5353 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5354 of the OFF-DIAGONAL portion of local A 5355 5356 Collective on Mat 5357 5358 Input Parameters: 5359 + A,B - the matrices in mpiaij format 5360 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5361 5362 Output Parameter: 5363 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5364 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5365 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5366 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5367 5368 Level: developer 5369 5370 */ 5371 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5372 { 5373 VecScatter_MPI_General *gen_to,*gen_from; 5374 PetscErrorCode ierr; 5375 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5376 Mat_SeqAIJ *b_oth; 5377 VecScatter ctx =a->Mvctx; 5378 MPI_Comm comm; 5379 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5380 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5381 PetscScalar *rvalues,*svalues; 5382 MatScalar *b_otha,*bufa,*bufA; 5383 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5384 MPI_Request *rwaits = NULL,*swaits = NULL; 5385 MPI_Status *sstatus,rstatus; 5386 PetscMPIInt jj; 5387 PetscInt *cols,sbs,rbs; 5388 PetscScalar *vals; 5389 5390 PetscFunctionBegin; 5391 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5392 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5393 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5394 } 5395 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5396 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5397 5398 gen_to = (VecScatter_MPI_General*)ctx->todata; 5399 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5400 rvalues = gen_from->values; /* holds the length of receiving row */ 5401 svalues = gen_to->values; /* holds the length of sending row */ 5402 nrecvs = gen_from->n; 5403 nsends = gen_to->n; 5404 5405 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5406 srow = gen_to->indices; /* local row index to be sent */ 5407 sstarts = gen_to->starts; 5408 sprocs = gen_to->procs; 5409 sstatus = gen_to->sstatus; 5410 sbs = gen_to->bs; 5411 rstarts = gen_from->starts; 5412 rprocs = gen_from->procs; 5413 rbs = gen_from->bs; 5414 5415 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5416 if (scall == MAT_INITIAL_MATRIX) { 5417 /* i-array */ 5418 /*---------*/ 5419 /* post receives */ 5420 for (i=0; i<nrecvs; i++) { 5421 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5422 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5423 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5424 } 5425 5426 /* pack the outgoing message */ 5427 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5428 5429 sstartsj[0] = 0; 5430 rstartsj[0] = 0; 5431 len = 0; /* total length of j or a array to be sent */ 5432 k = 0; 5433 for (i=0; i<nsends; i++) { 5434 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5435 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5436 for (j=0; j<nrows; j++) { 5437 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5438 for (l=0; l<sbs; l++) { 5439 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5440 5441 rowlen[j*sbs+l] = ncols; 5442 5443 len += ncols; 5444 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5445 } 5446 k++; 5447 } 5448 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5449 5450 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5451 } 5452 /* recvs and sends of i-array are completed */ 5453 i = nrecvs; 5454 while (i--) { 5455 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5456 } 5457 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5458 5459 /* allocate buffers for sending j and a arrays */ 5460 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5461 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5462 5463 /* create i-array of B_oth */ 5464 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5465 5466 b_othi[0] = 0; 5467 len = 0; /* total length of j or a array to be received */ 5468 k = 0; 5469 for (i=0; i<nrecvs; i++) { 5470 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5471 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5472 for (j=0; j<nrows; j++) { 5473 b_othi[k+1] = b_othi[k] + rowlen[j]; 5474 len += rowlen[j]; k++; 5475 } 5476 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5477 } 5478 5479 /* allocate space for j and a arrrays of B_oth */ 5480 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5481 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5482 5483 /* j-array */ 5484 /*---------*/ 5485 /* post receives of j-array */ 5486 for (i=0; i<nrecvs; i++) { 5487 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5488 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5489 } 5490 5491 /* pack the outgoing message j-array */ 5492 k = 0; 5493 for (i=0; i<nsends; i++) { 5494 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5495 bufJ = bufj+sstartsj[i]; 5496 for (j=0; j<nrows; j++) { 5497 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5498 for (ll=0; ll<sbs; ll++) { 5499 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5500 for (l=0; l<ncols; l++) { 5501 *bufJ++ = cols[l]; 5502 } 5503 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5504 } 5505 } 5506 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5507 } 5508 5509 /* recvs and sends of j-array are completed */ 5510 i = nrecvs; 5511 while (i--) { 5512 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5513 } 5514 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5515 } else if (scall == MAT_REUSE_MATRIX) { 5516 sstartsj = *startsj_s; 5517 rstartsj = *startsj_r; 5518 bufa = *bufa_ptr; 5519 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5520 b_otha = b_oth->a; 5521 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5522 5523 /* a-array */ 5524 /*---------*/ 5525 /* post receives of a-array */ 5526 for (i=0; i<nrecvs; i++) { 5527 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5528 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5529 } 5530 5531 /* pack the outgoing message a-array */ 5532 k = 0; 5533 for (i=0; i<nsends; i++) { 5534 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5535 bufA = bufa+sstartsj[i]; 5536 for (j=0; j<nrows; j++) { 5537 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5538 for (ll=0; ll<sbs; ll++) { 5539 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5540 for (l=0; l<ncols; l++) { 5541 *bufA++ = vals[l]; 5542 } 5543 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5544 } 5545 } 5546 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5547 } 5548 /* recvs and sends of a-array are completed */ 5549 i = nrecvs; 5550 while (i--) { 5551 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5552 } 5553 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5554 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5555 5556 if (scall == MAT_INITIAL_MATRIX) { 5557 /* put together the new matrix */ 5558 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5559 5560 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5561 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5562 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5563 b_oth->free_a = PETSC_TRUE; 5564 b_oth->free_ij = PETSC_TRUE; 5565 b_oth->nonew = 0; 5566 5567 ierr = PetscFree(bufj);CHKERRQ(ierr); 5568 if (!startsj_s || !bufa_ptr) { 5569 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5570 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5571 } else { 5572 *startsj_s = sstartsj; 5573 *startsj_r = rstartsj; 5574 *bufa_ptr = bufa; 5575 } 5576 } 5577 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5578 PetscFunctionReturn(0); 5579 } 5580 5581 #undef __FUNCT__ 5582 #define __FUNCT__ "MatGetCommunicationStructs" 5583 /*@C 5584 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5585 5586 Not Collective 5587 5588 Input Parameters: 5589 . A - The matrix in mpiaij format 5590 5591 Output Parameter: 5592 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5593 . colmap - A map from global column index to local index into lvec 5594 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5595 5596 Level: developer 5597 5598 @*/ 5599 #if defined(PETSC_USE_CTABLE) 5600 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5601 #else 5602 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5603 #endif 5604 { 5605 Mat_MPIAIJ *a; 5606 5607 PetscFunctionBegin; 5608 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5609 PetscValidPointer(lvec, 2); 5610 PetscValidPointer(colmap, 3); 5611 PetscValidPointer(multScatter, 4); 5612 a = (Mat_MPIAIJ*) A->data; 5613 if (lvec) *lvec = a->lvec; 5614 if (colmap) *colmap = a->colmap; 5615 if (multScatter) *multScatter = a->Mvctx; 5616 PetscFunctionReturn(0); 5617 } 5618 5619 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5620 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5621 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5622 5623 #undef __FUNCT__ 5624 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5625 /* 5626 Computes (B'*A')' since computing B*A directly is untenable 5627 5628 n p p 5629 ( ) ( ) ( ) 5630 m ( A ) * n ( B ) = m ( C ) 5631 ( ) ( ) ( ) 5632 5633 */ 5634 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5635 { 5636 PetscErrorCode ierr; 5637 Mat At,Bt,Ct; 5638 5639 PetscFunctionBegin; 5640 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5641 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5642 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5643 ierr = MatDestroy(&At);CHKERRQ(ierr); 5644 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5645 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5646 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5647 PetscFunctionReturn(0); 5648 } 5649 5650 #undef __FUNCT__ 5651 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5652 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5653 { 5654 PetscErrorCode ierr; 5655 PetscInt m=A->rmap->n,n=B->cmap->n; 5656 Mat Cmat; 5657 5658 PetscFunctionBegin; 5659 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5660 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5661 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5662 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5663 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5664 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5665 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5666 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5667 5668 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5669 5670 *C = Cmat; 5671 PetscFunctionReturn(0); 5672 } 5673 5674 /* ----------------------------------------------------------------*/ 5675 #undef __FUNCT__ 5676 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5677 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5678 { 5679 PetscErrorCode ierr; 5680 5681 PetscFunctionBegin; 5682 if (scall == MAT_INITIAL_MATRIX) { 5683 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5684 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5685 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5686 } 5687 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5688 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5689 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5690 PetscFunctionReturn(0); 5691 } 5692 5693 #if defined(PETSC_HAVE_MUMPS) 5694 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5695 #endif 5696 #if defined(PETSC_HAVE_PASTIX) 5697 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5698 #endif 5699 #if defined(PETSC_HAVE_SUPERLU_DIST) 5700 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5701 #endif 5702 #if defined(PETSC_HAVE_CLIQUE) 5703 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5704 #endif 5705 5706 /*MC 5707 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5708 5709 Options Database Keys: 5710 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5711 5712 Level: beginner 5713 5714 .seealso: MatCreateAIJ() 5715 M*/ 5716 5717 #undef __FUNCT__ 5718 #define __FUNCT__ "MatCreate_MPIAIJ" 5719 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5720 { 5721 Mat_MPIAIJ *b; 5722 PetscErrorCode ierr; 5723 PetscMPIInt size; 5724 5725 PetscFunctionBegin; 5726 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5727 5728 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5729 B->data = (void*)b; 5730 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5731 B->assembled = PETSC_FALSE; 5732 B->insertmode = NOT_SET_VALUES; 5733 b->size = size; 5734 5735 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5736 5737 /* build cache for off array entries formed */ 5738 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5739 5740 b->donotstash = PETSC_FALSE; 5741 b->colmap = 0; 5742 b->garray = 0; 5743 b->roworiented = PETSC_TRUE; 5744 5745 /* stuff used for matrix vector multiply */ 5746 b->lvec = NULL; 5747 b->Mvctx = NULL; 5748 5749 /* stuff for MatGetRow() */ 5750 b->rowindices = 0; 5751 b->rowvalues = 0; 5752 b->getrowactive = PETSC_FALSE; 5753 5754 /* flexible pointer used in CUSP/CUSPARSE classes */ 5755 b->spptr = NULL; 5756 5757 #if defined(PETSC_HAVE_MUMPS) 5758 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5759 #endif 5760 #if defined(PETSC_HAVE_PASTIX) 5761 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5762 #endif 5763 #if defined(PETSC_HAVE_SUPERLU_DIST) 5764 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5765 #endif 5766 #if defined(PETSC_HAVE_CLIQUE) 5767 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5768 #endif 5769 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5770 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5771 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5772 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5773 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5774 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5775 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5776 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5777 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5778 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5779 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5780 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5781 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5782 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5783 PetscFunctionReturn(0); 5784 } 5785 5786 #undef __FUNCT__ 5787 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5788 /*@ 5789 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5790 and "off-diagonal" part of the matrix in CSR format. 5791 5792 Collective on MPI_Comm 5793 5794 Input Parameters: 5795 + comm - MPI communicator 5796 . m - number of local rows (Cannot be PETSC_DECIDE) 5797 . n - This value should be the same as the local size used in creating the 5798 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5799 calculated if N is given) For square matrices n is almost always m. 5800 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5801 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5802 . i - row indices for "diagonal" portion of matrix 5803 . j - column indices 5804 . a - matrix values 5805 . oi - row indices for "off-diagonal" portion of matrix 5806 . oj - column indices 5807 - oa - matrix values 5808 5809 Output Parameter: 5810 . mat - the matrix 5811 5812 Level: advanced 5813 5814 Notes: 5815 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5816 must free the arrays once the matrix has been destroyed and not before. 5817 5818 The i and j indices are 0 based 5819 5820 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5821 5822 This sets local rows and cannot be used to set off-processor values. 5823 5824 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5825 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5826 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5827 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5828 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5829 communication if it is known that only local entries will be set. 5830 5831 .keywords: matrix, aij, compressed row, sparse, parallel 5832 5833 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5834 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5835 @*/ 5836 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5837 { 5838 PetscErrorCode ierr; 5839 Mat_MPIAIJ *maij; 5840 5841 PetscFunctionBegin; 5842 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5843 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5844 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5845 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5846 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5847 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5848 maij = (Mat_MPIAIJ*) (*mat)->data; 5849 5850 (*mat)->preallocated = PETSC_TRUE; 5851 5852 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5853 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5854 5855 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5856 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5857 5858 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5859 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5860 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5861 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5862 5863 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5864 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5865 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5866 PetscFunctionReturn(0); 5867 } 5868 5869 /* 5870 Special version for direct calls from Fortran 5871 */ 5872 #include <petsc-private/fortranimpl.h> 5873 5874 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5875 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5876 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5877 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5878 #endif 5879 5880 /* Change these macros so can be used in void function */ 5881 #undef CHKERRQ 5882 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5883 #undef SETERRQ2 5884 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5885 #undef SETERRQ3 5886 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5887 #undef SETERRQ 5888 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5889 5890 #undef __FUNCT__ 5891 #define __FUNCT__ "matsetvaluesmpiaij_" 5892 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5893 { 5894 Mat mat = *mmat; 5895 PetscInt m = *mm, n = *mn; 5896 InsertMode addv = *maddv; 5897 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5898 PetscScalar value; 5899 PetscErrorCode ierr; 5900 5901 MatCheckPreallocated(mat,1); 5902 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5903 5904 #if defined(PETSC_USE_DEBUG) 5905 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5906 #endif 5907 { 5908 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5909 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5910 PetscBool roworiented = aij->roworiented; 5911 5912 /* Some Variables required in the macro */ 5913 Mat A = aij->A; 5914 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5915 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5916 MatScalar *aa = a->a; 5917 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5918 Mat B = aij->B; 5919 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5920 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5921 MatScalar *ba = b->a; 5922 5923 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5924 PetscInt nonew = a->nonew; 5925 MatScalar *ap1,*ap2; 5926 5927 PetscFunctionBegin; 5928 for (i=0; i<m; i++) { 5929 if (im[i] < 0) continue; 5930 #if defined(PETSC_USE_DEBUG) 5931 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5932 #endif 5933 if (im[i] >= rstart && im[i] < rend) { 5934 row = im[i] - rstart; 5935 lastcol1 = -1; 5936 rp1 = aj + ai[row]; 5937 ap1 = aa + ai[row]; 5938 rmax1 = aimax[row]; 5939 nrow1 = ailen[row]; 5940 low1 = 0; 5941 high1 = nrow1; 5942 lastcol2 = -1; 5943 rp2 = bj + bi[row]; 5944 ap2 = ba + bi[row]; 5945 rmax2 = bimax[row]; 5946 nrow2 = bilen[row]; 5947 low2 = 0; 5948 high2 = nrow2; 5949 5950 for (j=0; j<n; j++) { 5951 if (roworiented) value = v[i*n+j]; 5952 else value = v[i+j*m]; 5953 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5954 if (in[j] >= cstart && in[j] < cend) { 5955 col = in[j] - cstart; 5956 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5957 } else if (in[j] < 0) continue; 5958 #if defined(PETSC_USE_DEBUG) 5959 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5960 #endif 5961 else { 5962 if (mat->was_assembled) { 5963 if (!aij->colmap) { 5964 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5965 } 5966 #if defined(PETSC_USE_CTABLE) 5967 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5968 col--; 5969 #else 5970 col = aij->colmap[in[j]] - 1; 5971 #endif 5972 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5973 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5974 col = in[j]; 5975 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5976 B = aij->B; 5977 b = (Mat_SeqAIJ*)B->data; 5978 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5979 rp2 = bj + bi[row]; 5980 ap2 = ba + bi[row]; 5981 rmax2 = bimax[row]; 5982 nrow2 = bilen[row]; 5983 low2 = 0; 5984 high2 = nrow2; 5985 bm = aij->B->rmap->n; 5986 ba = b->a; 5987 } 5988 } else col = in[j]; 5989 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5990 } 5991 } 5992 } else if (!aij->donotstash) { 5993 if (roworiented) { 5994 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5995 } else { 5996 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5997 } 5998 } 5999 } 6000 } 6001 PetscFunctionReturnVoid(); 6002 } 6003 6004