1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory((PetscObject)mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 714 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 715 716 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 717 718 aij->rowvalues = 0; 719 720 /* used by MatAXPY() */ 721 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 722 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 723 724 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 725 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 738 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_MPIAIJ" 744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 745 { 746 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 747 PetscErrorCode ierr; 748 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 749 PetscInt i,*owners = A->rmap->range; 750 PetscInt *nprocs,j,idx,nsends,row; 751 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 752 PetscInt *rvalues,count,base,slen,*source; 753 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 754 MPI_Comm comm; 755 MPI_Request *send_waits,*recv_waits; 756 MPI_Status recv_status,*send_status; 757 const PetscScalar *xx; 758 PetscScalar *bb; 759 #if defined(PETSC_DEBUG) 760 PetscBool found = PETSC_FALSE; 761 #endif 762 763 PetscFunctionBegin; 764 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 765 /* first count number of contributors to each processor */ 766 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 767 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 768 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 769 j = 0; 770 for (i=0; i<N; i++) { 771 if (lastidx > (idx = rows[i])) j = 0; 772 lastidx = idx; 773 for (; j<size; j++) { 774 if (idx >= owners[j] && idx < owners[j+1]) { 775 nprocs[2*j]++; 776 nprocs[2*j+1] = 1; 777 owner[i] = j; 778 #if defined(PETSC_DEBUG) 779 found = PETSC_TRUE; 780 #endif 781 break; 782 } 783 } 784 #if defined(PETSC_DEBUG) 785 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 786 found = PETSC_FALSE; 787 #endif 788 } 789 nsends = 0; 790 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 791 792 if (A->nooffproczerorows) { 793 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 794 nrecvs = nsends; 795 nmax = N; 796 } else { 797 /* inform other processors of number of messages and max length*/ 798 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 799 } 800 801 /* post receives: */ 802 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 804 for (i=0; i<nrecvs; i++) { 805 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 806 } 807 808 /* do sends: 809 1) starts[i] gives the starting index in svalues for stuff going to 810 the ith processor 811 */ 812 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 813 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 814 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 815 816 starts[0] = 0; 817 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 818 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 819 820 starts[0] = 0; 821 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 822 count = 0; 823 for (i=0; i<size; i++) { 824 if (nprocs[2*i+1]) { 825 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 826 } 827 } 828 ierr = PetscFree(starts);CHKERRQ(ierr); 829 830 base = owners[rank]; 831 832 /* wait on receives */ 833 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 834 count = nrecvs; slen = 0; 835 while (count) { 836 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 837 /* unpack receives into our local space */ 838 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 839 840 source[imdex] = recv_status.MPI_SOURCE; 841 lens[imdex] = n; 842 slen += n; 843 count--; 844 } 845 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 846 847 /* move the data into the send scatter */ 848 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 849 count = 0; 850 for (i=0; i<nrecvs; i++) { 851 values = rvalues + i*nmax; 852 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 853 } 854 ierr = PetscFree(rvalues);CHKERRQ(ierr); 855 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 856 ierr = PetscFree(owner);CHKERRQ(ierr); 857 ierr = PetscFree(nprocs);CHKERRQ(ierr); 858 859 /* fix right hand side if needed */ 860 if (x && b) { 861 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 862 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 863 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 864 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 865 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 866 } 867 /* 868 Zero the required rows. If the "diagonal block" of the matrix 869 is square and the user wishes to set the diagonal we use separate 870 code so that MatSetValues() is not called for each diagonal allocating 871 new memory, thus calling lots of mallocs and slowing things down. 872 873 */ 874 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 875 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 876 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 877 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 878 } else if (diag != 0.0) { 879 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 880 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 881 for (i = 0; i < slen; i++) { 882 row = lrows[i] + rstart; 883 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 884 } 885 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 886 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 } else { 888 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(lrows);CHKERRQ(ierr); 891 892 /* wait on sends */ 893 if (nsends) { 894 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 895 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 896 ierr = PetscFree(send_status);CHKERRQ(ierr); 897 } 898 ierr = PetscFree(send_waits);CHKERRQ(ierr); 899 ierr = PetscFree(svalues);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 905 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 906 { 907 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 908 PetscErrorCode ierr; 909 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 910 PetscInt i,*owners = A->rmap->range; 911 PetscInt *nprocs,j,idx,nsends; 912 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 913 PetscInt *rvalues,count,base,slen,*source; 914 PetscInt *lens,*lrows,*values,m; 915 MPI_Comm comm; 916 MPI_Request *send_waits,*recv_waits; 917 MPI_Status recv_status,*send_status; 918 const PetscScalar *xx; 919 PetscScalar *bb,*mask; 920 Vec xmask,lmask; 921 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 922 const PetscInt *aj, *ii,*ridx; 923 PetscScalar *aa; 924 #if defined(PETSC_DEBUG) 925 PetscBool found = PETSC_FALSE; 926 #endif 927 928 PetscFunctionBegin; 929 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 930 /* first count number of contributors to each processor */ 931 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 932 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 933 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 934 j = 0; 935 for (i=0; i<N; i++) { 936 if (lastidx > (idx = rows[i])) j = 0; 937 lastidx = idx; 938 for (; j<size; j++) { 939 if (idx >= owners[j] && idx < owners[j+1]) { 940 nprocs[2*j]++; 941 nprocs[2*j+1] = 1; 942 owner[i] = j; 943 #if defined(PETSC_DEBUG) 944 found = PETSC_TRUE; 945 #endif 946 break; 947 } 948 } 949 #if defined(PETSC_DEBUG) 950 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 951 found = PETSC_FALSE; 952 #endif 953 } 954 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 955 956 /* inform other processors of number of messages and max length*/ 957 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 958 959 /* post receives: */ 960 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 961 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 962 for (i=0; i<nrecvs; i++) { 963 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 964 } 965 966 /* do sends: 967 1) starts[i] gives the starting index in svalues for stuff going to 968 the ith processor 969 */ 970 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 971 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 972 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 973 974 starts[0] = 0; 975 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 976 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 977 978 starts[0] = 0; 979 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 980 count = 0; 981 for (i=0; i<size; i++) { 982 if (nprocs[2*i+1]) { 983 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 984 } 985 } 986 ierr = PetscFree(starts);CHKERRQ(ierr); 987 988 base = owners[rank]; 989 990 /* wait on receives */ 991 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 992 count = nrecvs; slen = 0; 993 while (count) { 994 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 995 /* unpack receives into our local space */ 996 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 997 998 source[imdex] = recv_status.MPI_SOURCE; 999 lens[imdex] = n; 1000 slen += n; 1001 count--; 1002 } 1003 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1004 1005 /* move the data into the send scatter */ 1006 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1007 count = 0; 1008 for (i=0; i<nrecvs; i++) { 1009 values = rvalues + i*nmax; 1010 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1011 } 1012 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1013 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1014 ierr = PetscFree(owner);CHKERRQ(ierr); 1015 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1016 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1017 1018 /* zero diagonal part of matrix */ 1019 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1020 1021 /* handle off diagonal part of matrix */ 1022 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1023 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1024 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1025 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use) { 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++) { 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMult_MPIAIJ" 1101 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 PetscInt nt; 1106 1107 PetscFunctionBegin; 1108 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1109 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1110 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1112 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1119 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1120 { 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 1136 PetscFunctionBegin; 1137 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1139 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1147 { 1148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1149 PetscErrorCode ierr; 1150 PetscBool merged; 1151 1152 PetscFunctionBegin; 1153 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1154 /* do nondiagonal part */ 1155 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1156 if (!merged) { 1157 /* send it on its way */ 1158 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 /* do local part */ 1160 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1161 /* receive remote parts: note this assumes the values are not actually */ 1162 /* added in yy until the next line, */ 1163 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1164 } else { 1165 /* do local part */ 1166 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1167 /* send it on its way */ 1168 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1169 /* values actually were received in the Begin() but we need to call this nop */ 1170 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1177 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1178 { 1179 MPI_Comm comm; 1180 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1181 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1182 IS Me,Notme; 1183 PetscErrorCode ierr; 1184 PetscInt M,N,first,last,*notme,i; 1185 PetscMPIInt size; 1186 1187 PetscFunctionBegin; 1188 /* Easy test: symmetric diagonal block */ 1189 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1190 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1191 if (!*f) PetscFunctionReturn(0); 1192 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1193 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1194 if (size == 1) PetscFunctionReturn(0); 1195 1196 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1197 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1198 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1199 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1200 for (i=0; i<first; i++) notme[i] = i; 1201 for (i=last; i<M; i++) notme[i-last+first] = i; 1202 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1203 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1204 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1205 Aoff = Aoffs[0]; 1206 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1207 Boff = Boffs[0]; 1208 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1209 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1211 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1213 ierr = PetscFree(notme);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1219 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1220 { 1221 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1222 PetscErrorCode ierr; 1223 1224 PetscFunctionBegin; 1225 /* do nondiagonal part */ 1226 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1227 /* send it on its way */ 1228 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1229 /* do local part */ 1230 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1231 /* receive remote parts */ 1232 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 /* 1237 This only works correctly for square matrices where the subblock A->A is the 1238 diagonal block 1239 */ 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1242 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1243 { 1244 PetscErrorCode ierr; 1245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1246 1247 PetscFunctionBegin; 1248 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1249 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1250 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatScale_MPIAIJ" 1256 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1257 { 1258 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1263 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatDestroy_MPIAIJ" 1269 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1270 { 1271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 #if defined(PETSC_USE_LOG) 1276 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1277 #endif 1278 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1279 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1280 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1282 #if defined(PETSC_USE_CTABLE) 1283 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1284 #else 1285 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1286 #endif 1287 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1288 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1289 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1290 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1291 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1292 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1293 1294 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1308 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1309 { 1310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1311 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1312 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1313 PetscErrorCode ierr; 1314 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1315 int fd; 1316 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1317 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1318 PetscScalar *column_values; 1319 PetscInt message_count,flowcontrolcount; 1320 FILE *file; 1321 1322 PetscFunctionBegin; 1323 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1324 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1325 nz = A->nz + B->nz; 1326 if (!rank) { 1327 header[0] = MAT_FILE_CLASSID; 1328 header[1] = mat->rmap->N; 1329 header[2] = mat->cmap->N; 1330 1331 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1332 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1333 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1334 /* get largest number of rows any processor has */ 1335 rlen = mat->rmap->n; 1336 range = mat->rmap->range; 1337 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1338 } else { 1339 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1340 rlen = mat->rmap->n; 1341 } 1342 1343 /* load up the local row counts */ 1344 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1345 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1346 1347 /* store the row lengths to the file */ 1348 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1349 if (!rank) { 1350 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1351 for (i=1; i<size; i++) { 1352 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1353 rlen = range[i+1] - range[i]; 1354 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1355 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1356 } 1357 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1358 } else { 1359 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1360 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1361 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1362 } 1363 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1364 1365 /* load up the local column indices */ 1366 nzmax = nz; /* th processor needs space a largest processor needs */ 1367 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1368 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1369 cnt = 0; 1370 for (i=0; i<mat->rmap->n; i++) { 1371 for (j=B->i[i]; j<B->i[i+1]; j++) { 1372 if ((col = garray[B->j[j]]) > cstart) break; 1373 column_indices[cnt++] = col; 1374 } 1375 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1376 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1377 } 1378 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1379 1380 /* store the column indices to the file */ 1381 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1382 if (!rank) { 1383 MPI_Status status; 1384 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1385 for (i=1; i<size; i++) { 1386 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1387 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1388 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1389 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1390 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1391 } 1392 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1393 } else { 1394 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1395 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1396 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1398 } 1399 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1400 1401 /* load up the local column values */ 1402 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1403 cnt = 0; 1404 for (i=0; i<mat->rmap->n; i++) { 1405 for (j=B->i[i]; j<B->i[i+1]; j++) { 1406 if (garray[B->j[j]] > cstart) break; 1407 column_values[cnt++] = B->a[j]; 1408 } 1409 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1410 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1411 } 1412 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1413 1414 /* store the column values to the file */ 1415 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1416 if (!rank) { 1417 MPI_Status status; 1418 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1419 for (i=1; i<size; i++) { 1420 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1421 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1422 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1423 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1424 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1425 } 1426 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1427 } else { 1428 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1429 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1430 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1432 } 1433 ierr = PetscFree(column_values);CHKERRQ(ierr); 1434 1435 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1436 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #include <petscdraw.h> 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1443 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1444 { 1445 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1446 PetscErrorCode ierr; 1447 PetscMPIInt rank = aij->rank,size = aij->size; 1448 PetscBool isdraw,iascii,isbinary; 1449 PetscViewer sviewer; 1450 PetscViewerFormat format; 1451 1452 PetscFunctionBegin; 1453 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1456 if (iascii) { 1457 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1458 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1459 MatInfo info; 1460 PetscBool inodes; 1461 1462 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1463 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1465 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1466 if (!inodes) { 1467 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1468 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1469 } else { 1470 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1471 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1472 } 1473 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1475 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1477 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1480 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1483 PetscInt inodecount,inodelimit,*inodes; 1484 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1485 if (inodes) { 1486 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1487 } else { 1488 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1489 } 1490 PetscFunctionReturn(0); 1491 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1492 PetscFunctionReturn(0); 1493 } 1494 } else if (isbinary) { 1495 if (size == 1) { 1496 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1497 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1498 } else { 1499 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1500 } 1501 PetscFunctionReturn(0); 1502 } else if (isdraw) { 1503 PetscDraw draw; 1504 PetscBool isnull; 1505 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1506 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1507 } 1508 1509 if (size == 1) { 1510 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1511 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1512 } else { 1513 /* assemble the entire matrix onto first processor. */ 1514 Mat A; 1515 Mat_SeqAIJ *Aloc; 1516 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1517 MatScalar *a; 1518 1519 if (mat->rmap->N > 1024) { 1520 PetscBool flg = PETSC_FALSE; 1521 1522 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1523 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1524 } 1525 1526 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1527 if (!rank) { 1528 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1529 } else { 1530 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1531 } 1532 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1533 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1534 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1535 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1536 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1537 1538 /* copy over the A part */ 1539 Aloc = (Mat_SeqAIJ*)aij->A->data; 1540 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1541 row = mat->rmap->rstart; 1542 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1543 for (i=0; i<m; i++) { 1544 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1545 row++; 1546 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1547 } 1548 aj = Aloc->j; 1549 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1550 1551 /* copy over the B part */ 1552 Aloc = (Mat_SeqAIJ*)aij->B->data; 1553 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1554 row = mat->rmap->rstart; 1555 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1556 ct = cols; 1557 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1558 for (i=0; i<m; i++) { 1559 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1560 row++; 1561 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1562 } 1563 ierr = PetscFree(ct);CHKERRQ(ierr); 1564 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1565 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 /* 1567 Everyone has to call to draw the matrix since the graphics waits are 1568 synchronized across all processors that share the PetscDraw object 1569 */ 1570 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1571 if (!rank) { 1572 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1573 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1574 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1575 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1576 } 1577 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1578 ierr = MatDestroy(&A);CHKERRQ(ierr); 1579 } 1580 PetscFunctionReturn(0); 1581 } 1582 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatView_MPIAIJ" 1585 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1586 { 1587 PetscErrorCode ierr; 1588 PetscBool iascii,isdraw,issocket,isbinary; 1589 1590 PetscFunctionBegin; 1591 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1595 if (iascii || isdraw || isbinary || issocket) { 1596 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1597 } 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatSOR_MPIAIJ" 1603 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1604 { 1605 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1606 PetscErrorCode ierr; 1607 Vec bb1 = 0; 1608 PetscBool hasop; 1609 1610 PetscFunctionBegin; 1611 if (flag == SOR_APPLY_UPPER) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 PetscFunctionReturn(0); 1614 } 1615 1616 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1617 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1618 } 1619 1620 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1621 if (flag & SOR_ZERO_INITIAL_GUESS) { 1622 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1623 its--; 1624 } 1625 1626 while (its--) { 1627 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 1630 /* update rhs: bb1 = bb - B*x */ 1631 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1632 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1633 1634 /* local sweep */ 1635 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1636 } 1637 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1638 if (flag & SOR_ZERO_INITIAL_GUESS) { 1639 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1640 its--; 1641 } 1642 while (its--) { 1643 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 1646 /* update rhs: bb1 = bb - B*x */ 1647 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1648 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1649 1650 /* local sweep */ 1651 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1652 } 1653 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1654 if (flag & SOR_ZERO_INITIAL_GUESS) { 1655 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1656 its--; 1657 } 1658 while (its--) { 1659 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 1662 /* update rhs: bb1 = bb - B*x */ 1663 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1664 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1665 1666 /* local sweep */ 1667 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1668 } 1669 } else if (flag & SOR_EISENSTAT) { 1670 Vec xx1; 1671 1672 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1673 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1674 1675 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1676 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 if (!mat->diag) { 1678 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1679 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1680 } 1681 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1682 if (hasop) { 1683 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1684 } else { 1685 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1686 } 1687 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1688 1689 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1690 1691 /* local sweep */ 1692 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1693 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1694 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1695 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1696 1697 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatPermute_MPIAIJ" 1703 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1704 { 1705 Mat aA,aB,Aperm; 1706 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1707 PetscScalar *aa,*ba; 1708 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1709 PetscSF rowsf,sf; 1710 IS parcolp = NULL; 1711 PetscBool done; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1716 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1717 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1718 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1719 1720 /* Invert row permutation to find out where my rows should go */ 1721 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1722 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1723 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1724 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1725 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1726 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 1728 /* Invert column permutation to find out where my columns should go */ 1729 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1730 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1731 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1732 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1733 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1734 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1736 1737 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1738 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1739 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1740 1741 /* Find out where my gcols should go */ 1742 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1743 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1744 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1745 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1746 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1747 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1748 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1750 1751 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1752 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1753 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1756 for (i=0; i<m; i++) { 1757 PetscInt row = rdest[i],rowner; 1758 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1759 for (j=ai[i]; j<ai[i+1]; j++) { 1760 PetscInt cowner,col = cdest[aj[j]]; 1761 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1762 if (rowner == cowner) dnnz[i]++; 1763 else onnz[i]++; 1764 } 1765 for (j=bi[i]; j<bi[i+1]; j++) { 1766 PetscInt cowner,col = gcdest[bj[j]]; 1767 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1768 if (rowner == cowner) dnnz[i]++; 1769 else onnz[i]++; 1770 } 1771 } 1772 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1773 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1775 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1779 1780 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1781 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1783 for (i=0; i<m; i++) { 1784 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1785 PetscInt rowlen; 1786 rowlen = ai[i+1] - ai[i]; 1787 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1788 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1789 rowlen = bi[i+1] - bi[i]; 1790 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1791 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1792 } 1793 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1794 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1797 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1799 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1800 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1801 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1802 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1803 *B = Aperm; 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1809 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1810 { 1811 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1812 Mat A = mat->A,B = mat->B; 1813 PetscErrorCode ierr; 1814 PetscReal isend[5],irecv[5]; 1815 1816 PetscFunctionBegin; 1817 info->block_size = 1.0; 1818 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1819 1820 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1821 isend[3] = info->memory; isend[4] = info->mallocs; 1822 1823 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1824 1825 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1826 isend[3] += info->memory; isend[4] += info->mallocs; 1827 if (flag == MAT_LOCAL) { 1828 info->nz_used = isend[0]; 1829 info->nz_allocated = isend[1]; 1830 info->nz_unneeded = isend[2]; 1831 info->memory = isend[3]; 1832 info->mallocs = isend[4]; 1833 } else if (flag == MAT_GLOBAL_MAX) { 1834 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1835 1836 info->nz_used = irecv[0]; 1837 info->nz_allocated = irecv[1]; 1838 info->nz_unneeded = irecv[2]; 1839 info->memory = irecv[3]; 1840 info->mallocs = irecv[4]; 1841 } else if (flag == MAT_GLOBAL_SUM) { 1842 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1843 1844 info->nz_used = irecv[0]; 1845 info->nz_allocated = irecv[1]; 1846 info->nz_unneeded = irecv[2]; 1847 info->memory = irecv[3]; 1848 info->mallocs = irecv[4]; 1849 } 1850 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1851 info->fill_ratio_needed = 0; 1852 info->factor_mallocs = 0; 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatSetOption_MPIAIJ" 1858 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1859 { 1860 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1861 PetscErrorCode ierr; 1862 1863 PetscFunctionBegin; 1864 switch (op) { 1865 case MAT_NEW_NONZERO_LOCATIONS: 1866 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1867 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1868 case MAT_KEEP_NONZERO_PATTERN: 1869 case MAT_NEW_NONZERO_LOCATION_ERR: 1870 case MAT_USE_INODES: 1871 case MAT_IGNORE_ZERO_ENTRIES: 1872 MatCheckPreallocated(A,1); 1873 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1874 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1875 break; 1876 case MAT_ROW_ORIENTED: 1877 a->roworiented = flg; 1878 1879 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1880 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1881 break; 1882 case MAT_NEW_DIAGONALS: 1883 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1884 break; 1885 case MAT_IGNORE_OFF_PROC_ENTRIES: 1886 a->donotstash = flg; 1887 break; 1888 case MAT_SPD: 1889 A->spd_set = PETSC_TRUE; 1890 A->spd = flg; 1891 if (flg) { 1892 A->symmetric = PETSC_TRUE; 1893 A->structurally_symmetric = PETSC_TRUE; 1894 A->symmetric_set = PETSC_TRUE; 1895 A->structurally_symmetric_set = PETSC_TRUE; 1896 } 1897 break; 1898 case MAT_SYMMETRIC: 1899 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1900 break; 1901 case MAT_STRUCTURALLY_SYMMETRIC: 1902 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1903 break; 1904 case MAT_HERMITIAN: 1905 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1906 break; 1907 case MAT_SYMMETRY_ETERNAL: 1908 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1909 break; 1910 default: 1911 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1912 } 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatGetRow_MPIAIJ" 1918 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1919 { 1920 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1921 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1922 PetscErrorCode ierr; 1923 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1924 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1925 PetscInt *cmap,*idx_p; 1926 1927 PetscFunctionBegin; 1928 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1929 mat->getrowactive = PETSC_TRUE; 1930 1931 if (!mat->rowvalues && (idx || v)) { 1932 /* 1933 allocate enough space to hold information from the longest row. 1934 */ 1935 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1936 PetscInt max = 1,tmp; 1937 for (i=0; i<matin->rmap->n; i++) { 1938 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1939 if (max < tmp) max = tmp; 1940 } 1941 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1942 } 1943 1944 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1945 lrow = row - rstart; 1946 1947 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1948 if (!v) {pvA = 0; pvB = 0;} 1949 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1950 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1951 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1952 nztot = nzA + nzB; 1953 1954 cmap = mat->garray; 1955 if (v || idx) { 1956 if (nztot) { 1957 /* Sort by increasing column numbers, assuming A and B already sorted */ 1958 PetscInt imark = -1; 1959 if (v) { 1960 *v = v_p = mat->rowvalues; 1961 for (i=0; i<nzB; i++) { 1962 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1963 else break; 1964 } 1965 imark = i; 1966 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1967 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1968 } 1969 if (idx) { 1970 *idx = idx_p = mat->rowindices; 1971 if (imark > -1) { 1972 for (i=0; i<imark; i++) { 1973 idx_p[i] = cmap[cworkB[i]]; 1974 } 1975 } else { 1976 for (i=0; i<nzB; i++) { 1977 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1978 else break; 1979 } 1980 imark = i; 1981 } 1982 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1983 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1984 } 1985 } else { 1986 if (idx) *idx = 0; 1987 if (v) *v = 0; 1988 } 1989 } 1990 *nz = nztot; 1991 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1992 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1998 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1999 { 2000 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2001 2002 PetscFunctionBegin; 2003 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2004 aij->getrowactive = PETSC_FALSE; 2005 PetscFunctionReturn(0); 2006 } 2007 2008 #undef __FUNCT__ 2009 #define __FUNCT__ "MatNorm_MPIAIJ" 2010 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2011 { 2012 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2013 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2014 PetscErrorCode ierr; 2015 PetscInt i,j,cstart = mat->cmap->rstart; 2016 PetscReal sum = 0.0; 2017 MatScalar *v; 2018 2019 PetscFunctionBegin; 2020 if (aij->size == 1) { 2021 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2022 } else { 2023 if (type == NORM_FROBENIUS) { 2024 v = amat->a; 2025 for (i=0; i<amat->nz; i++) { 2026 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2027 } 2028 v = bmat->a; 2029 for (i=0; i<bmat->nz; i++) { 2030 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2031 } 2032 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2033 *norm = PetscSqrtReal(*norm); 2034 } else if (type == NORM_1) { /* max column norm */ 2035 PetscReal *tmp,*tmp2; 2036 PetscInt *jj,*garray = aij->garray; 2037 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2039 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2040 *norm = 0.0; 2041 v = amat->a; jj = amat->j; 2042 for (j=0; j<amat->nz; j++) { 2043 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2044 } 2045 v = bmat->a; jj = bmat->j; 2046 for (j=0; j<bmat->nz; j++) { 2047 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2048 } 2049 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2050 for (j=0; j<mat->cmap->N; j++) { 2051 if (tmp2[j] > *norm) *norm = tmp2[j]; 2052 } 2053 ierr = PetscFree(tmp);CHKERRQ(ierr); 2054 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2055 } else if (type == NORM_INFINITY) { /* max row norm */ 2056 PetscReal ntemp = 0.0; 2057 for (j=0; j<aij->A->rmap->n; j++) { 2058 v = amat->a + amat->i[j]; 2059 sum = 0.0; 2060 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2061 sum += PetscAbsScalar(*v); v++; 2062 } 2063 v = bmat->a + bmat->i[j]; 2064 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2065 sum += PetscAbsScalar(*v); v++; 2066 } 2067 if (sum > ntemp) ntemp = sum; 2068 } 2069 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2070 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatTranspose_MPIAIJ" 2077 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2078 { 2079 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2080 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2081 PetscErrorCode ierr; 2082 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2083 PetscInt cstart = A->cmap->rstart,ncol; 2084 Mat B; 2085 MatScalar *array; 2086 2087 PetscFunctionBegin; 2088 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2089 2090 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2091 ai = Aloc->i; aj = Aloc->j; 2092 bi = Bloc->i; bj = Bloc->j; 2093 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2094 PetscInt *d_nnz,*g_nnz,*o_nnz; 2095 PetscSFNode *oloc; 2096 PETSC_UNUSED PetscSF sf; 2097 2098 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2099 /* compute d_nnz for preallocation */ 2100 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2101 for (i=0; i<ai[ma]; i++) { 2102 d_nnz[aj[i]]++; 2103 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2104 } 2105 /* compute local off-diagonal contributions */ 2106 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2107 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2108 /* map those to global */ 2109 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2110 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2111 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2112 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2113 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2114 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2116 2117 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2118 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2119 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2120 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2121 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2122 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2123 } else { 2124 B = *matout; 2125 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2126 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2127 } 2128 2129 /* copy over the A part */ 2130 array = Aloc->a; 2131 row = A->rmap->rstart; 2132 for (i=0; i<ma; i++) { 2133 ncol = ai[i+1]-ai[i]; 2134 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2135 row++; 2136 array += ncol; aj += ncol; 2137 } 2138 aj = Aloc->j; 2139 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2140 2141 /* copy over the B part */ 2142 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2143 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2144 array = Bloc->a; 2145 row = A->rmap->rstart; 2146 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2147 cols_tmp = cols; 2148 for (i=0; i<mb; i++) { 2149 ncol = bi[i+1]-bi[i]; 2150 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2151 row++; 2152 array += ncol; cols_tmp += ncol; 2153 } 2154 ierr = PetscFree(cols);CHKERRQ(ierr); 2155 2156 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2159 *matout = B; 2160 } else { 2161 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2168 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2169 { 2170 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2171 Mat a = aij->A,b = aij->B; 2172 PetscErrorCode ierr; 2173 PetscInt s1,s2,s3; 2174 2175 PetscFunctionBegin; 2176 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2177 if (rr) { 2178 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2179 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2180 /* Overlap communication with computation. */ 2181 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2182 } 2183 if (ll) { 2184 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2185 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2186 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2187 } 2188 /* scale the diagonal block */ 2189 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2190 2191 if (rr) { 2192 /* Do a scatter end and then right scale the off-diagonal block */ 2193 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2194 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2201 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2208 PetscFunctionReturn(0); 2209 } 2210 2211 #undef __FUNCT__ 2212 #define __FUNCT__ "MatEqual_MPIAIJ" 2213 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2214 { 2215 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2216 Mat a,b,c,d; 2217 PetscBool flg; 2218 PetscErrorCode ierr; 2219 2220 PetscFunctionBegin; 2221 a = matA->A; b = matA->B; 2222 c = matB->A; d = matB->B; 2223 2224 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2225 if (flg) { 2226 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2227 } 2228 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatCopy_MPIAIJ" 2234 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2235 { 2236 PetscErrorCode ierr; 2237 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2238 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2239 2240 PetscFunctionBegin; 2241 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2242 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2243 /* because of the column compression in the off-processor part of the matrix a->B, 2244 the number of columns in a->B and b->B may be different, hence we cannot call 2245 the MatCopy() directly on the two parts. If need be, we can provide a more 2246 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2247 then copying the submatrices */ 2248 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2249 } else { 2250 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2251 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatSetUp_MPIAIJ" 2258 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2259 { 2260 PetscErrorCode ierr; 2261 2262 PetscFunctionBegin; 2263 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2269 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2270 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2271 { 2272 PetscInt i,m=Y->rmap->N; 2273 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2274 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2275 const PetscInt *xi = x->i,*yi = y->i; 2276 2277 PetscFunctionBegin; 2278 /* Set the number of nonzeros in the new matrix */ 2279 for (i=0; i<m; i++) { 2280 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2281 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2282 nnz[i] = 0; 2283 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2284 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2285 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2286 nnz[i]++; 2287 } 2288 for (; k<nzy; k++) nnz[i]++; 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatAXPY_MPIAIJ" 2295 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2296 { 2297 PetscErrorCode ierr; 2298 PetscInt i; 2299 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2300 PetscBLASInt bnz,one=1; 2301 Mat_SeqAIJ *x,*y; 2302 2303 PetscFunctionBegin; 2304 if (str == SAME_NONZERO_PATTERN) { 2305 PetscScalar alpha = a; 2306 x = (Mat_SeqAIJ*)xx->A->data; 2307 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2308 y = (Mat_SeqAIJ*)yy->A->data; 2309 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2310 x = (Mat_SeqAIJ*)xx->B->data; 2311 y = (Mat_SeqAIJ*)yy->B->data; 2312 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2313 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2314 } else if (str == SUBSET_NONZERO_PATTERN) { 2315 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2316 2317 x = (Mat_SeqAIJ*)xx->B->data; 2318 y = (Mat_SeqAIJ*)yy->B->data; 2319 if (y->xtoy && y->XtoY != xx->B) { 2320 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2321 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2322 } 2323 if (!y->xtoy) { /* get xtoy */ 2324 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2325 y->XtoY = xx->B; 2326 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2327 } 2328 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2329 } else { 2330 Mat B; 2331 PetscInt *nnz_d,*nnz_o; 2332 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2333 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2334 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2335 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2336 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2337 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2338 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2339 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2341 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2342 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2343 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2344 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2346 } 2347 PetscFunctionReturn(0); 2348 } 2349 2350 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2351 2352 #undef __FUNCT__ 2353 #define __FUNCT__ "MatConjugate_MPIAIJ" 2354 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2355 { 2356 #if defined(PETSC_USE_COMPLEX) 2357 PetscErrorCode ierr; 2358 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2359 2360 PetscFunctionBegin; 2361 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2362 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2363 #else 2364 PetscFunctionBegin; 2365 #endif 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatRealPart_MPIAIJ" 2371 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2372 { 2373 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2374 PetscErrorCode ierr; 2375 2376 PetscFunctionBegin; 2377 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2378 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2384 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2385 { 2386 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2387 PetscErrorCode ierr; 2388 2389 PetscFunctionBegin; 2390 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2391 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #if defined(PETSC_HAVE_PBGL) 2396 2397 #include <boost/parallel/mpi/bsp_process_group.hpp> 2398 #include <boost/graph/distributed/ilu_default_graph.hpp> 2399 #include <boost/graph/distributed/ilu_0_block.hpp> 2400 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2401 #include <boost/graph/distributed/petsc/interface.hpp> 2402 #include <boost/multi_array.hpp> 2403 #include <boost/parallel/distributed_property_map->hpp> 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2407 /* 2408 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2409 */ 2410 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2411 { 2412 namespace petsc = boost::distributed::petsc; 2413 2414 namespace graph_dist = boost::graph::distributed; 2415 using boost::graph::distributed::ilu_default::process_group_type; 2416 using boost::graph::ilu_permuted; 2417 2418 PetscBool row_identity, col_identity; 2419 PetscContainer c; 2420 PetscInt m, n, M, N; 2421 PetscErrorCode ierr; 2422 2423 PetscFunctionBegin; 2424 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2425 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2426 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2427 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2428 2429 process_group_type pg; 2430 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2431 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2432 lgraph_type& level_graph = *lgraph_p; 2433 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2434 2435 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2436 ilu_permuted(level_graph); 2437 2438 /* put together the new matrix */ 2439 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2440 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2441 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2442 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2443 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2444 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2445 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2446 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 2448 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2449 ierr = PetscContainerSetPointer(c, lgraph_p); 2450 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2451 ierr = PetscContainerDestroy(&c); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 #undef __FUNCT__ 2456 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2457 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2458 { 2459 PetscFunctionBegin; 2460 PetscFunctionReturn(0); 2461 } 2462 2463 #undef __FUNCT__ 2464 #define __FUNCT__ "MatSolve_MPIAIJ" 2465 /* 2466 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2467 */ 2468 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2469 { 2470 namespace graph_dist = boost::graph::distributed; 2471 2472 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2473 lgraph_type *lgraph_p; 2474 PetscContainer c; 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2479 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2480 ierr = VecCopy(b, x);CHKERRQ(ierr); 2481 2482 PetscScalar *array_x; 2483 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2484 PetscInt sx; 2485 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2486 2487 PetscScalar *array_b; 2488 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2489 PetscInt sb; 2490 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2491 2492 lgraph_type& level_graph = *lgraph_p; 2493 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2494 2495 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2496 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2497 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2498 2499 typedef boost::iterator_property_map<array_ref_type::iterator, 2500 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2501 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2502 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2503 2504 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2505 PetscFunctionReturn(0); 2506 } 2507 #endif 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "MatDestroy_MatRedundant" 2511 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2512 { 2513 PetscErrorCode ierr; 2514 Mat_Redundant *redund; 2515 PetscInt i; 2516 PetscMPIInt size; 2517 2518 PetscFunctionBegin; 2519 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2520 if (size == 1) { 2521 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2522 redund = a->redundant; 2523 } else { 2524 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2525 redund = a->redundant; 2526 } 2527 if (redund){ 2528 if (redund->matseq) { /* via MatGetSubMatrices() */ 2529 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2530 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2531 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2532 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2533 } else { 2534 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2535 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2536 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2537 for (i=0; i<redund->nrecvs; i++) { 2538 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2539 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2540 } 2541 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2542 } 2543 2544 if (redund->psubcomm) { 2545 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2546 } 2547 ierr = redund->Destroy(A);CHKERRQ(ierr); 2548 ierr = PetscFree(redund);CHKERRQ(ierr); 2549 } 2550 PetscFunctionReturn(0); 2551 } 2552 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2555 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2556 { 2557 PetscMPIInt rank,size; 2558 MPI_Comm comm; 2559 PetscErrorCode ierr; 2560 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2561 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2562 PetscInt *rowrange = mat->rmap->range; 2563 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2564 Mat A = aij->A,B=aij->B,C=*matredundant; 2565 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2566 PetscScalar *sbuf_a; 2567 PetscInt nzlocal=a->nz+b->nz; 2568 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2569 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2570 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2571 MatScalar *aworkA,*aworkB; 2572 PetscScalar *vals; 2573 PetscMPIInt tag1,tag2,tag3,imdex; 2574 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2575 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2576 MPI_Status recv_status,*send_status; 2577 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2578 PetscInt **rbuf_j=NULL; 2579 PetscScalar **rbuf_a=NULL; 2580 Mat_Redundant *redund =NULL; 2581 2582 PetscFunctionBegin; 2583 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2584 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2585 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2586 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2587 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2588 2589 if (reuse == MAT_REUSE_MATRIX) { 2590 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2591 if (subsize == 1) { 2592 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2593 redund = c->redundant; 2594 } else { 2595 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2596 redund = c->redundant; 2597 } 2598 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2599 2600 nsends = redund->nsends; 2601 nrecvs = redund->nrecvs; 2602 send_rank = redund->send_rank; 2603 recv_rank = redund->recv_rank; 2604 sbuf_nz = redund->sbuf_nz; 2605 rbuf_nz = redund->rbuf_nz; 2606 sbuf_j = redund->sbuf_j; 2607 sbuf_a = redund->sbuf_a; 2608 rbuf_j = redund->rbuf_j; 2609 rbuf_a = redund->rbuf_a; 2610 } 2611 2612 if (reuse == MAT_INITIAL_MATRIX) { 2613 PetscInt nleftover,np_subcomm; 2614 2615 /* get the destination processors' id send_rank, nsends and nrecvs */ 2616 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2617 2618 np_subcomm = size/nsubcomm; 2619 nleftover = size - nsubcomm*np_subcomm; 2620 2621 /* block of codes below is specific for INTERLACED */ 2622 /* ------------------------------------------------*/ 2623 nsends = 0; nrecvs = 0; 2624 for (i=0; i<size; i++) { 2625 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2626 send_rank[nsends++] = i; 2627 recv_rank[nrecvs++] = i; 2628 } 2629 } 2630 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2631 i = size-nleftover-1; 2632 j = 0; 2633 while (j < nsubcomm - nleftover) { 2634 send_rank[nsends++] = i; 2635 i--; j++; 2636 } 2637 } 2638 2639 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2640 for (i=0; i<nleftover; i++) { 2641 recv_rank[nrecvs++] = size-nleftover+i; 2642 } 2643 } 2644 /*----------------------------------------------*/ 2645 2646 /* allocate sbuf_j, sbuf_a */ 2647 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2648 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2649 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2650 /* 2651 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2652 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 2653 */ 2654 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2655 2656 /* copy mat's local entries into the buffers */ 2657 if (reuse == MAT_INITIAL_MATRIX) { 2658 rownz_max = 0; 2659 rptr = sbuf_j; 2660 cols = sbuf_j + rend-rstart + 1; 2661 vals = sbuf_a; 2662 rptr[0] = 0; 2663 for (i=0; i<rend-rstart; i++) { 2664 row = i + rstart; 2665 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2666 ncols = nzA + nzB; 2667 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2668 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2669 /* load the column indices for this row into cols */ 2670 lwrite = 0; 2671 for (l=0; l<nzB; l++) { 2672 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2673 vals[lwrite] = aworkB[l]; 2674 cols[lwrite++] = ctmp; 2675 } 2676 } 2677 for (l=0; l<nzA; l++) { 2678 vals[lwrite] = aworkA[l]; 2679 cols[lwrite++] = cstart + cworkA[l]; 2680 } 2681 for (l=0; l<nzB; l++) { 2682 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2683 vals[lwrite] = aworkB[l]; 2684 cols[lwrite++] = ctmp; 2685 } 2686 } 2687 vals += ncols; 2688 cols += ncols; 2689 rptr[i+1] = rptr[i] + ncols; 2690 if (rownz_max < ncols) rownz_max = ncols; 2691 } 2692 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2693 } else { /* only copy matrix values into sbuf_a */ 2694 rptr = sbuf_j; 2695 vals = sbuf_a; 2696 rptr[0] = 0; 2697 for (i=0; i<rend-rstart; i++) { 2698 row = i + rstart; 2699 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2700 ncols = nzA + nzB; 2701 cworkB = b->j + b->i[i]; 2702 aworkA = a->a + a->i[i]; 2703 aworkB = b->a + b->i[i]; 2704 lwrite = 0; 2705 for (l=0; l<nzB; l++) { 2706 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2707 } 2708 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2709 for (l=0; l<nzB; l++) { 2710 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2711 } 2712 vals += ncols; 2713 rptr[i+1] = rptr[i] + ncols; 2714 } 2715 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2716 2717 /* send nzlocal to others, and recv other's nzlocal */ 2718 /*--------------------------------------------------*/ 2719 if (reuse == MAT_INITIAL_MATRIX) { 2720 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2721 2722 s_waits2 = s_waits3 + nsends; 2723 s_waits1 = s_waits2 + nsends; 2724 r_waits1 = s_waits1 + nsends; 2725 r_waits2 = r_waits1 + nrecvs; 2726 r_waits3 = r_waits2 + nrecvs; 2727 } else { 2728 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2729 2730 r_waits3 = s_waits3 + nsends; 2731 } 2732 2733 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2734 if (reuse == MAT_INITIAL_MATRIX) { 2735 /* get new tags to keep the communication clean */ 2736 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2737 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2738 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2739 2740 /* post receives of other's nzlocal */ 2741 for (i=0; i<nrecvs; i++) { 2742 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2743 } 2744 /* send nzlocal to others */ 2745 for (i=0; i<nsends; i++) { 2746 sbuf_nz[i] = nzlocal; 2747 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2748 } 2749 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2750 count = nrecvs; 2751 while (count) { 2752 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2753 2754 recv_rank[imdex] = recv_status.MPI_SOURCE; 2755 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2756 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2757 2758 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2759 2760 rbuf_nz[imdex] += i + 2; 2761 2762 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2763 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2764 count--; 2765 } 2766 /* wait on sends of nzlocal */ 2767 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2768 /* send mat->i,j to others, and recv from other's */ 2769 /*------------------------------------------------*/ 2770 for (i=0; i<nsends; i++) { 2771 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2772 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2773 } 2774 /* wait on receives of mat->i,j */ 2775 /*------------------------------*/ 2776 count = nrecvs; 2777 while (count) { 2778 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2779 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2780 count--; 2781 } 2782 /* wait on sends of mat->i,j */ 2783 /*---------------------------*/ 2784 if (nsends) { 2785 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2786 } 2787 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2788 2789 /* post receives, send and receive mat->a */ 2790 /*----------------------------------------*/ 2791 for (imdex=0; imdex<nrecvs; imdex++) { 2792 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2793 } 2794 for (i=0; i<nsends; i++) { 2795 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2796 } 2797 count = nrecvs; 2798 while (count) { 2799 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2800 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2801 count--; 2802 } 2803 if (nsends) { 2804 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2805 } 2806 2807 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2808 2809 /* create redundant matrix */ 2810 /*-------------------------*/ 2811 if (reuse == MAT_INITIAL_MATRIX) { 2812 const PetscInt *range; 2813 PetscInt rstart_sub,rend_sub,mloc_sub; 2814 2815 /* compute rownz_max for preallocation */ 2816 for (imdex=0; imdex<nrecvs; imdex++) { 2817 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2818 rptr = rbuf_j[imdex]; 2819 for (i=0; i<j; i++) { 2820 ncols = rptr[i+1] - rptr[i]; 2821 if (rownz_max < ncols) rownz_max = ncols; 2822 } 2823 } 2824 2825 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2826 2827 /* get local size of redundant matrix 2828 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2829 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2830 rstart_sub = range[nsubcomm*subrank]; 2831 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2832 rend_sub = range[nsubcomm*(subrank+1)]; 2833 } else { 2834 rend_sub = mat->rmap->N; 2835 } 2836 mloc_sub = rend_sub - rstart_sub; 2837 2838 if (M == N) { 2839 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2840 } else { /* non-square matrix */ 2841 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2842 } 2843 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2844 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2845 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2846 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2847 } else { 2848 C = *matredundant; 2849 } 2850 2851 /* insert local matrix entries */ 2852 rptr = sbuf_j; 2853 cols = sbuf_j + rend-rstart + 1; 2854 vals = sbuf_a; 2855 for (i=0; i<rend-rstart; i++) { 2856 row = i + rstart; 2857 ncols = rptr[i+1] - rptr[i]; 2858 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2859 vals += ncols; 2860 cols += ncols; 2861 } 2862 /* insert received matrix entries */ 2863 for (imdex=0; imdex<nrecvs; imdex++) { 2864 rstart = rowrange[recv_rank[imdex]]; 2865 rend = rowrange[recv_rank[imdex]+1]; 2866 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2867 rptr = rbuf_j[imdex]; 2868 cols = rbuf_j[imdex] + rend-rstart + 1; 2869 vals = rbuf_a[imdex]; 2870 for (i=0; i<rend-rstart; i++) { 2871 row = i + rstart; 2872 ncols = rptr[i+1] - rptr[i]; 2873 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2874 vals += ncols; 2875 cols += ncols; 2876 } 2877 } 2878 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2879 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2880 2881 if (reuse == MAT_INITIAL_MATRIX) { 2882 *matredundant = C; 2883 2884 /* create a supporting struct and attach it to C for reuse */ 2885 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2886 if (subsize == 1) { 2887 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2888 c->redundant = redund; 2889 } else { 2890 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2891 c->redundant = redund; 2892 } 2893 2894 redund->nzlocal = nzlocal; 2895 redund->nsends = nsends; 2896 redund->nrecvs = nrecvs; 2897 redund->send_rank = send_rank; 2898 redund->recv_rank = recv_rank; 2899 redund->sbuf_nz = sbuf_nz; 2900 redund->rbuf_nz = rbuf_nz; 2901 redund->sbuf_j = sbuf_j; 2902 redund->sbuf_a = sbuf_a; 2903 redund->rbuf_j = rbuf_j; 2904 redund->rbuf_a = rbuf_a; 2905 redund->psubcomm = NULL; 2906 2907 redund->Destroy = C->ops->destroy; 2908 C->ops->destroy = MatDestroy_MatRedundant; 2909 } 2910 PetscFunctionReturn(0); 2911 } 2912 2913 #undef __FUNCT__ 2914 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2915 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2916 { 2917 PetscErrorCode ierr; 2918 MPI_Comm comm; 2919 PetscMPIInt size,subsize; 2920 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2921 Mat_Redundant *redund=NULL; 2922 PetscSubcomm psubcomm=NULL; 2923 MPI_Comm subcomm_in=subcomm; 2924 Mat *matseq; 2925 IS isrow,iscol; 2926 2927 PetscFunctionBegin; 2928 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2929 if (reuse == MAT_INITIAL_MATRIX) { 2930 /* create psubcomm, then get subcomm */ 2931 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2932 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2933 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2934 2935 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2936 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2937 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2938 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2939 subcomm = psubcomm->comm; 2940 } else { /* retrieve psubcomm and subcomm */ 2941 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2942 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2943 if (subsize == 1) { 2944 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2945 redund = c->redundant; 2946 } else { 2947 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2948 redund = c->redundant; 2949 } 2950 psubcomm = redund->psubcomm; 2951 } 2952 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2953 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2954 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2955 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2956 if (subsize == 1) { 2957 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2958 c->redundant->psubcomm = psubcomm; 2959 } else { 2960 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2961 c->redundant->psubcomm = psubcomm ; 2962 } 2963 } 2964 PetscFunctionReturn(0); 2965 } 2966 } 2967 2968 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2969 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2970 if (reuse == MAT_INITIAL_MATRIX) { 2971 /* create a local sequential matrix matseq[0] */ 2972 mloc_sub = PETSC_DECIDE; 2973 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2974 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2975 rstart = rend - mloc_sub; 2976 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2977 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2978 } else { /* reuse == MAT_REUSE_MATRIX */ 2979 if (subsize == 1) { 2980 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2981 redund = c->redundant; 2982 } else { 2983 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2984 redund = c->redundant; 2985 } 2986 2987 isrow = redund->isrow; 2988 iscol = redund->iscol; 2989 matseq = redund->matseq; 2990 } 2991 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2992 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2993 2994 if (reuse == MAT_INITIAL_MATRIX) { 2995 /* create a supporting struct and attach it to C for reuse */ 2996 ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr); 2997 if (subsize == 1) { 2998 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2999 c->redundant = redund; 3000 } else { 3001 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 3002 c->redundant = redund; 3003 } 3004 redund->isrow = isrow; 3005 redund->iscol = iscol; 3006 redund->matseq = matseq; 3007 redund->psubcomm = psubcomm; 3008 redund->Destroy = (*matredundant)->ops->destroy; 3009 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 3010 } 3011 PetscFunctionReturn(0); 3012 } 3013 3014 #undef __FUNCT__ 3015 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 3016 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3017 { 3018 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3019 PetscErrorCode ierr; 3020 PetscInt i,*idxb = 0; 3021 PetscScalar *va,*vb; 3022 Vec vtmp; 3023 3024 PetscFunctionBegin; 3025 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 3026 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3027 if (idx) { 3028 for (i=0; i<A->rmap->n; i++) { 3029 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3030 } 3031 } 3032 3033 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3034 if (idx) { 3035 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3036 } 3037 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3038 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3039 3040 for (i=0; i<A->rmap->n; i++) { 3041 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 3042 va[i] = vb[i]; 3043 if (idx) idx[i] = a->garray[idxb[i]]; 3044 } 3045 } 3046 3047 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3048 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3049 ierr = PetscFree(idxb);CHKERRQ(ierr); 3050 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3051 PetscFunctionReturn(0); 3052 } 3053 3054 #undef __FUNCT__ 3055 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 3056 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3057 { 3058 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3059 PetscErrorCode ierr; 3060 PetscInt i,*idxb = 0; 3061 PetscScalar *va,*vb; 3062 Vec vtmp; 3063 3064 PetscFunctionBegin; 3065 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 3066 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3067 if (idx) { 3068 for (i=0; i<A->cmap->n; i++) { 3069 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3070 } 3071 } 3072 3073 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3074 if (idx) { 3075 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3076 } 3077 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3078 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3079 3080 for (i=0; i<A->rmap->n; i++) { 3081 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 3082 va[i] = vb[i]; 3083 if (idx) idx[i] = a->garray[idxb[i]]; 3084 } 3085 } 3086 3087 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3088 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3089 ierr = PetscFree(idxb);CHKERRQ(ierr); 3090 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3091 PetscFunctionReturn(0); 3092 } 3093 3094 #undef __FUNCT__ 3095 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3096 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3097 { 3098 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3099 PetscInt n = A->rmap->n; 3100 PetscInt cstart = A->cmap->rstart; 3101 PetscInt *cmap = mat->garray; 3102 PetscInt *diagIdx, *offdiagIdx; 3103 Vec diagV, offdiagV; 3104 PetscScalar *a, *diagA, *offdiagA; 3105 PetscInt r; 3106 PetscErrorCode ierr; 3107 3108 PetscFunctionBegin; 3109 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3110 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3111 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3112 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3113 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3114 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3115 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3116 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3117 for (r = 0; r < n; ++r) { 3118 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3119 a[r] = diagA[r]; 3120 idx[r] = cstart + diagIdx[r]; 3121 } else { 3122 a[r] = offdiagA[r]; 3123 idx[r] = cmap[offdiagIdx[r]]; 3124 } 3125 } 3126 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3127 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3128 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3129 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3130 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3131 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3132 PetscFunctionReturn(0); 3133 } 3134 3135 #undef __FUNCT__ 3136 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3137 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3138 { 3139 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3140 PetscInt n = A->rmap->n; 3141 PetscInt cstart = A->cmap->rstart; 3142 PetscInt *cmap = mat->garray; 3143 PetscInt *diagIdx, *offdiagIdx; 3144 Vec diagV, offdiagV; 3145 PetscScalar *a, *diagA, *offdiagA; 3146 PetscInt r; 3147 PetscErrorCode ierr; 3148 3149 PetscFunctionBegin; 3150 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3151 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3152 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3153 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3154 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3155 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3156 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3157 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3158 for (r = 0; r < n; ++r) { 3159 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3160 a[r] = diagA[r]; 3161 idx[r] = cstart + diagIdx[r]; 3162 } else { 3163 a[r] = offdiagA[r]; 3164 idx[r] = cmap[offdiagIdx[r]]; 3165 } 3166 } 3167 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3168 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3169 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3170 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3171 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3172 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3173 PetscFunctionReturn(0); 3174 } 3175 3176 #undef __FUNCT__ 3177 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3178 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3179 { 3180 PetscErrorCode ierr; 3181 Mat *dummy; 3182 3183 PetscFunctionBegin; 3184 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3185 *newmat = *dummy; 3186 ierr = PetscFree(dummy);CHKERRQ(ierr); 3187 PetscFunctionReturn(0); 3188 } 3189 3190 #undef __FUNCT__ 3191 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3192 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3193 { 3194 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3195 PetscErrorCode ierr; 3196 3197 PetscFunctionBegin; 3198 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3199 PetscFunctionReturn(0); 3200 } 3201 3202 #undef __FUNCT__ 3203 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3204 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3205 { 3206 PetscErrorCode ierr; 3207 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3208 3209 PetscFunctionBegin; 3210 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3211 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3212 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3213 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3214 PetscFunctionReturn(0); 3215 } 3216 3217 /* -------------------------------------------------------------------*/ 3218 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3219 MatGetRow_MPIAIJ, 3220 MatRestoreRow_MPIAIJ, 3221 MatMult_MPIAIJ, 3222 /* 4*/ MatMultAdd_MPIAIJ, 3223 MatMultTranspose_MPIAIJ, 3224 MatMultTransposeAdd_MPIAIJ, 3225 #if defined(PETSC_HAVE_PBGL) 3226 MatSolve_MPIAIJ, 3227 #else 3228 0, 3229 #endif 3230 0, 3231 0, 3232 /*10*/ 0, 3233 0, 3234 0, 3235 MatSOR_MPIAIJ, 3236 MatTranspose_MPIAIJ, 3237 /*15*/ MatGetInfo_MPIAIJ, 3238 MatEqual_MPIAIJ, 3239 MatGetDiagonal_MPIAIJ, 3240 MatDiagonalScale_MPIAIJ, 3241 MatNorm_MPIAIJ, 3242 /*20*/ MatAssemblyBegin_MPIAIJ, 3243 MatAssemblyEnd_MPIAIJ, 3244 MatSetOption_MPIAIJ, 3245 MatZeroEntries_MPIAIJ, 3246 /*24*/ MatZeroRows_MPIAIJ, 3247 0, 3248 #if defined(PETSC_HAVE_PBGL) 3249 0, 3250 #else 3251 0, 3252 #endif 3253 0, 3254 0, 3255 /*29*/ MatSetUp_MPIAIJ, 3256 #if defined(PETSC_HAVE_PBGL) 3257 0, 3258 #else 3259 0, 3260 #endif 3261 0, 3262 0, 3263 0, 3264 /*34*/ MatDuplicate_MPIAIJ, 3265 0, 3266 0, 3267 0, 3268 0, 3269 /*39*/ MatAXPY_MPIAIJ, 3270 MatGetSubMatrices_MPIAIJ, 3271 MatIncreaseOverlap_MPIAIJ, 3272 MatGetValues_MPIAIJ, 3273 MatCopy_MPIAIJ, 3274 /*44*/ MatGetRowMax_MPIAIJ, 3275 MatScale_MPIAIJ, 3276 0, 3277 0, 3278 MatZeroRowsColumns_MPIAIJ, 3279 /*49*/ MatSetRandom_MPIAIJ, 3280 0, 3281 0, 3282 0, 3283 0, 3284 /*54*/ MatFDColoringCreate_MPIXAIJ, 3285 0, 3286 MatSetUnfactored_MPIAIJ, 3287 MatPermute_MPIAIJ, 3288 0, 3289 /*59*/ MatGetSubMatrix_MPIAIJ, 3290 MatDestroy_MPIAIJ, 3291 MatView_MPIAIJ, 3292 0, 3293 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3294 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3295 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3296 0, 3297 0, 3298 0, 3299 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3300 MatGetRowMinAbs_MPIAIJ, 3301 0, 3302 MatSetColoring_MPIAIJ, 3303 0, 3304 MatSetValuesAdifor_MPIAIJ, 3305 /*75*/ MatFDColoringApply_AIJ, 3306 0, 3307 0, 3308 0, 3309 MatFindZeroDiagonals_MPIAIJ, 3310 /*80*/ 0, 3311 0, 3312 0, 3313 /*83*/ MatLoad_MPIAIJ, 3314 0, 3315 0, 3316 0, 3317 0, 3318 0, 3319 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3320 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3321 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3322 MatPtAP_MPIAIJ_MPIAIJ, 3323 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3324 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3325 0, 3326 0, 3327 0, 3328 0, 3329 /*99*/ 0, 3330 0, 3331 0, 3332 MatConjugate_MPIAIJ, 3333 0, 3334 /*104*/MatSetValuesRow_MPIAIJ, 3335 MatRealPart_MPIAIJ, 3336 MatImaginaryPart_MPIAIJ, 3337 0, 3338 0, 3339 /*109*/0, 3340 MatGetRedundantMatrix_MPIAIJ, 3341 MatGetRowMin_MPIAIJ, 3342 0, 3343 0, 3344 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3345 0, 3346 0, 3347 0, 3348 0, 3349 /*119*/0, 3350 0, 3351 0, 3352 0, 3353 MatGetMultiProcBlock_MPIAIJ, 3354 /*124*/MatFindNonzeroRows_MPIAIJ, 3355 MatGetColumnNorms_MPIAIJ, 3356 MatInvertBlockDiagonal_MPIAIJ, 3357 0, 3358 MatGetSubMatricesParallel_MPIAIJ, 3359 /*129*/0, 3360 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3361 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3362 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3363 0, 3364 /*134*/0, 3365 0, 3366 0, 3367 0, 3368 0, 3369 /*139*/0, 3370 0, 3371 0, 3372 MatFDColoringSetUp_MPIXAIJ 3373 }; 3374 3375 /* ----------------------------------------------------------------------------------------*/ 3376 3377 #undef __FUNCT__ 3378 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3379 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3380 { 3381 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3382 PetscErrorCode ierr; 3383 3384 PetscFunctionBegin; 3385 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3386 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3387 PetscFunctionReturn(0); 3388 } 3389 3390 #undef __FUNCT__ 3391 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3392 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3393 { 3394 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3395 PetscErrorCode ierr; 3396 3397 PetscFunctionBegin; 3398 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3399 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3400 PetscFunctionReturn(0); 3401 } 3402 3403 #undef __FUNCT__ 3404 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3405 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3406 { 3407 Mat_MPIAIJ *b; 3408 PetscErrorCode ierr; 3409 3410 PetscFunctionBegin; 3411 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3412 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3413 b = (Mat_MPIAIJ*)B->data; 3414 3415 if (!B->preallocated) { 3416 /* Explicitly create 2 MATSEQAIJ matrices. */ 3417 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3418 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3419 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3420 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3421 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3422 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3423 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3424 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3425 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3426 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3427 } 3428 3429 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3430 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3431 B->preallocated = PETSC_TRUE; 3432 PetscFunctionReturn(0); 3433 } 3434 3435 #undef __FUNCT__ 3436 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3437 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3438 { 3439 Mat mat; 3440 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3441 PetscErrorCode ierr; 3442 3443 PetscFunctionBegin; 3444 *newmat = 0; 3445 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3446 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3447 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3448 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3449 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3450 a = (Mat_MPIAIJ*)mat->data; 3451 3452 mat->factortype = matin->factortype; 3453 mat->rmap->bs = matin->rmap->bs; 3454 mat->cmap->bs = matin->cmap->bs; 3455 mat->assembled = PETSC_TRUE; 3456 mat->insertmode = NOT_SET_VALUES; 3457 mat->preallocated = PETSC_TRUE; 3458 3459 a->size = oldmat->size; 3460 a->rank = oldmat->rank; 3461 a->donotstash = oldmat->donotstash; 3462 a->roworiented = oldmat->roworiented; 3463 a->rowindices = 0; 3464 a->rowvalues = 0; 3465 a->getrowactive = PETSC_FALSE; 3466 3467 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3468 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3469 3470 if (oldmat->colmap) { 3471 #if defined(PETSC_USE_CTABLE) 3472 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3473 #else 3474 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3475 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3476 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3477 #endif 3478 } else a->colmap = 0; 3479 if (oldmat->garray) { 3480 PetscInt len; 3481 len = oldmat->B->cmap->n; 3482 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3483 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3484 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3485 } else a->garray = 0; 3486 3487 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3488 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3489 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3490 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3491 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3492 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3493 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3494 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3495 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3496 *newmat = mat; 3497 PetscFunctionReturn(0); 3498 } 3499 3500 3501 3502 #undef __FUNCT__ 3503 #define __FUNCT__ "MatLoad_MPIAIJ" 3504 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3505 { 3506 PetscScalar *vals,*svals; 3507 MPI_Comm comm; 3508 PetscErrorCode ierr; 3509 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3510 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3511 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3512 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3513 PetscInt cend,cstart,n,*rowners,sizesset=1; 3514 int fd; 3515 PetscInt bs = 1; 3516 3517 PetscFunctionBegin; 3518 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3519 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3520 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3521 if (!rank) { 3522 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3523 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3524 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3525 } 3526 3527 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3528 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3529 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3530 3531 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3532 3533 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3534 M = header[1]; N = header[2]; 3535 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3536 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3537 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3538 3539 /* If global sizes are set, check if they are consistent with that given in the file */ 3540 if (sizesset) { 3541 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3542 } 3543 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3544 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3545 3546 /* determine ownership of all (block) rows */ 3547 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3548 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3549 else m = newMat->rmap->n; /* Set by user */ 3550 3551 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3552 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3553 3554 /* First process needs enough room for process with most rows */ 3555 if (!rank) { 3556 mmax = rowners[1]; 3557 for (i=2; i<=size; i++) { 3558 mmax = PetscMax(mmax, rowners[i]); 3559 } 3560 } else mmax = -1; /* unused, but compilers complain */ 3561 3562 rowners[0] = 0; 3563 for (i=2; i<=size; i++) { 3564 rowners[i] += rowners[i-1]; 3565 } 3566 rstart = rowners[rank]; 3567 rend = rowners[rank+1]; 3568 3569 /* distribute row lengths to all processors */ 3570 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3571 if (!rank) { 3572 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3573 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3574 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3575 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3576 for (j=0; j<m; j++) { 3577 procsnz[0] += ourlens[j]; 3578 } 3579 for (i=1; i<size; i++) { 3580 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3581 /* calculate the number of nonzeros on each processor */ 3582 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3583 procsnz[i] += rowlengths[j]; 3584 } 3585 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3586 } 3587 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3588 } else { 3589 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3590 } 3591 3592 if (!rank) { 3593 /* determine max buffer needed and allocate it */ 3594 maxnz = 0; 3595 for (i=0; i<size; i++) { 3596 maxnz = PetscMax(maxnz,procsnz[i]); 3597 } 3598 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3599 3600 /* read in my part of the matrix column indices */ 3601 nz = procsnz[0]; 3602 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3603 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3604 3605 /* read in every one elses and ship off */ 3606 for (i=1; i<size; i++) { 3607 nz = procsnz[i]; 3608 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3609 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3610 } 3611 ierr = PetscFree(cols);CHKERRQ(ierr); 3612 } else { 3613 /* determine buffer space needed for message */ 3614 nz = 0; 3615 for (i=0; i<m; i++) { 3616 nz += ourlens[i]; 3617 } 3618 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3619 3620 /* receive message of column indices*/ 3621 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3622 } 3623 3624 /* determine column ownership if matrix is not square */ 3625 if (N != M) { 3626 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3627 else n = newMat->cmap->n; 3628 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3629 cstart = cend - n; 3630 } else { 3631 cstart = rstart; 3632 cend = rend; 3633 n = cend - cstart; 3634 } 3635 3636 /* loop over local rows, determining number of off diagonal entries */ 3637 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3638 jj = 0; 3639 for (i=0; i<m; i++) { 3640 for (j=0; j<ourlens[i]; j++) { 3641 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3642 jj++; 3643 } 3644 } 3645 3646 for (i=0; i<m; i++) { 3647 ourlens[i] -= offlens[i]; 3648 } 3649 if (!sizesset) { 3650 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3651 } 3652 3653 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3654 3655 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3656 3657 for (i=0; i<m; i++) { 3658 ourlens[i] += offlens[i]; 3659 } 3660 3661 if (!rank) { 3662 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3663 3664 /* read in my part of the matrix numerical values */ 3665 nz = procsnz[0]; 3666 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3667 3668 /* insert into matrix */ 3669 jj = rstart; 3670 smycols = mycols; 3671 svals = vals; 3672 for (i=0; i<m; i++) { 3673 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3674 smycols += ourlens[i]; 3675 svals += ourlens[i]; 3676 jj++; 3677 } 3678 3679 /* read in other processors and ship out */ 3680 for (i=1; i<size; i++) { 3681 nz = procsnz[i]; 3682 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3683 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3684 } 3685 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3686 } else { 3687 /* receive numeric values */ 3688 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3689 3690 /* receive message of values*/ 3691 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3692 3693 /* insert into matrix */ 3694 jj = rstart; 3695 smycols = mycols; 3696 svals = vals; 3697 for (i=0; i<m; i++) { 3698 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3699 smycols += ourlens[i]; 3700 svals += ourlens[i]; 3701 jj++; 3702 } 3703 } 3704 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3705 ierr = PetscFree(vals);CHKERRQ(ierr); 3706 ierr = PetscFree(mycols);CHKERRQ(ierr); 3707 ierr = PetscFree(rowners);CHKERRQ(ierr); 3708 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3709 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3710 PetscFunctionReturn(0); 3711 } 3712 3713 #undef __FUNCT__ 3714 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3715 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3716 { 3717 PetscErrorCode ierr; 3718 IS iscol_local; 3719 PetscInt csize; 3720 3721 PetscFunctionBegin; 3722 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3723 if (call == MAT_REUSE_MATRIX) { 3724 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3725 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3726 } else { 3727 PetscInt cbs; 3728 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3729 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3730 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3731 } 3732 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3733 if (call == MAT_INITIAL_MATRIX) { 3734 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3735 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3736 } 3737 PetscFunctionReturn(0); 3738 } 3739 3740 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3741 #undef __FUNCT__ 3742 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3743 /* 3744 Not great since it makes two copies of the submatrix, first an SeqAIJ 3745 in local and then by concatenating the local matrices the end result. 3746 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3747 3748 Note: This requires a sequential iscol with all indices. 3749 */ 3750 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3751 { 3752 PetscErrorCode ierr; 3753 PetscMPIInt rank,size; 3754 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3755 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3756 PetscBool allcolumns, colflag; 3757 Mat M,Mreuse; 3758 MatScalar *vwork,*aa; 3759 MPI_Comm comm; 3760 Mat_SeqAIJ *aij; 3761 3762 PetscFunctionBegin; 3763 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3764 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3765 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3766 3767 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3768 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3769 if (colflag && ncol == mat->cmap->N) { 3770 allcolumns = PETSC_TRUE; 3771 } else { 3772 allcolumns = PETSC_FALSE; 3773 } 3774 if (call == MAT_REUSE_MATRIX) { 3775 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3776 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3777 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3778 } else { 3779 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3780 } 3781 3782 /* 3783 m - number of local rows 3784 n - number of columns (same on all processors) 3785 rstart - first row in new global matrix generated 3786 */ 3787 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3788 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3789 if (call == MAT_INITIAL_MATRIX) { 3790 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3791 ii = aij->i; 3792 jj = aij->j; 3793 3794 /* 3795 Determine the number of non-zeros in the diagonal and off-diagonal 3796 portions of the matrix in order to do correct preallocation 3797 */ 3798 3799 /* first get start and end of "diagonal" columns */ 3800 if (csize == PETSC_DECIDE) { 3801 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3802 if (mglobal == n) { /* square matrix */ 3803 nlocal = m; 3804 } else { 3805 nlocal = n/size + ((n % size) > rank); 3806 } 3807 } else { 3808 nlocal = csize; 3809 } 3810 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3811 rstart = rend - nlocal; 3812 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3813 3814 /* next, compute all the lengths */ 3815 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3816 olens = dlens + m; 3817 for (i=0; i<m; i++) { 3818 jend = ii[i+1] - ii[i]; 3819 olen = 0; 3820 dlen = 0; 3821 for (j=0; j<jend; j++) { 3822 if (*jj < rstart || *jj >= rend) olen++; 3823 else dlen++; 3824 jj++; 3825 } 3826 olens[i] = olen; 3827 dlens[i] = dlen; 3828 } 3829 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3830 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3831 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3832 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3833 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3834 ierr = PetscFree(dlens);CHKERRQ(ierr); 3835 } else { 3836 PetscInt ml,nl; 3837 3838 M = *newmat; 3839 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3840 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3841 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3842 /* 3843 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3844 rather than the slower MatSetValues(). 3845 */ 3846 M->was_assembled = PETSC_TRUE; 3847 M->assembled = PETSC_FALSE; 3848 } 3849 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3850 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3851 ii = aij->i; 3852 jj = aij->j; 3853 aa = aij->a; 3854 for (i=0; i<m; i++) { 3855 row = rstart + i; 3856 nz = ii[i+1] - ii[i]; 3857 cwork = jj; jj += nz; 3858 vwork = aa; aa += nz; 3859 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3860 } 3861 3862 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3863 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3864 *newmat = M; 3865 3866 /* save submatrix used in processor for next request */ 3867 if (call == MAT_INITIAL_MATRIX) { 3868 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3869 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3870 } 3871 PetscFunctionReturn(0); 3872 } 3873 3874 #undef __FUNCT__ 3875 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3876 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3877 { 3878 PetscInt m,cstart, cend,j,nnz,i,d; 3879 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3880 const PetscInt *JJ; 3881 PetscScalar *values; 3882 PetscErrorCode ierr; 3883 3884 PetscFunctionBegin; 3885 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3886 3887 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3888 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3889 m = B->rmap->n; 3890 cstart = B->cmap->rstart; 3891 cend = B->cmap->rend; 3892 rstart = B->rmap->rstart; 3893 3894 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3895 3896 #if defined(PETSC_USE_DEBUGGING) 3897 for (i=0; i<m; i++) { 3898 nnz = Ii[i+1]- Ii[i]; 3899 JJ = J + Ii[i]; 3900 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3901 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3902 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3903 } 3904 #endif 3905 3906 for (i=0; i<m; i++) { 3907 nnz = Ii[i+1]- Ii[i]; 3908 JJ = J + Ii[i]; 3909 nnz_max = PetscMax(nnz_max,nnz); 3910 d = 0; 3911 for (j=0; j<nnz; j++) { 3912 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3913 } 3914 d_nnz[i] = d; 3915 o_nnz[i] = nnz - d; 3916 } 3917 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3918 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3919 3920 if (v) values = (PetscScalar*)v; 3921 else { 3922 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3923 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3924 } 3925 3926 for (i=0; i<m; i++) { 3927 ii = i + rstart; 3928 nnz = Ii[i+1]- Ii[i]; 3929 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3930 } 3931 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3932 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3933 3934 if (!v) { 3935 ierr = PetscFree(values);CHKERRQ(ierr); 3936 } 3937 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3938 PetscFunctionReturn(0); 3939 } 3940 3941 #undef __FUNCT__ 3942 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3943 /*@ 3944 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3945 (the default parallel PETSc format). 3946 3947 Collective on MPI_Comm 3948 3949 Input Parameters: 3950 + B - the matrix 3951 . i - the indices into j for the start of each local row (starts with zero) 3952 . j - the column indices for each local row (starts with zero) 3953 - v - optional values in the matrix 3954 3955 Level: developer 3956 3957 Notes: 3958 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3959 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3960 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3961 3962 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3963 3964 The format which is used for the sparse matrix input, is equivalent to a 3965 row-major ordering.. i.e for the following matrix, the input data expected is 3966 as shown: 3967 3968 1 0 0 3969 2 0 3 P0 3970 ------- 3971 4 5 6 P1 3972 3973 Process0 [P0]: rows_owned=[0,1] 3974 i = {0,1,3} [size = nrow+1 = 2+1] 3975 j = {0,0,2} [size = nz = 6] 3976 v = {1,2,3} [size = nz = 6] 3977 3978 Process1 [P1]: rows_owned=[2] 3979 i = {0,3} [size = nrow+1 = 1+1] 3980 j = {0,1,2} [size = nz = 6] 3981 v = {4,5,6} [size = nz = 6] 3982 3983 .keywords: matrix, aij, compressed row, sparse, parallel 3984 3985 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3986 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3987 @*/ 3988 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3989 { 3990 PetscErrorCode ierr; 3991 3992 PetscFunctionBegin; 3993 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3994 PetscFunctionReturn(0); 3995 } 3996 3997 #undef __FUNCT__ 3998 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3999 /*@C 4000 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4001 (the default parallel PETSc format). For good matrix assembly performance 4002 the user should preallocate the matrix storage by setting the parameters 4003 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4004 performance can be increased by more than a factor of 50. 4005 4006 Collective on MPI_Comm 4007 4008 Input Parameters: 4009 + A - the matrix 4010 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4011 (same value is used for all local rows) 4012 . d_nnz - array containing the number of nonzeros in the various rows of the 4013 DIAGONAL portion of the local submatrix (possibly different for each row) 4014 or NULL, if d_nz is used to specify the nonzero structure. 4015 The size of this array is equal to the number of local rows, i.e 'm'. 4016 For matrices that will be factored, you must leave room for (and set) 4017 the diagonal entry even if it is zero. 4018 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4019 submatrix (same value is used for all local rows). 4020 - o_nnz - array containing the number of nonzeros in the various rows of the 4021 OFF-DIAGONAL portion of the local submatrix (possibly different for 4022 each row) or NULL, if o_nz is used to specify the nonzero 4023 structure. The size of this array is equal to the number 4024 of local rows, i.e 'm'. 4025 4026 If the *_nnz parameter is given then the *_nz parameter is ignored 4027 4028 The AIJ format (also called the Yale sparse matrix format or 4029 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4030 storage. The stored row and column indices begin with zero. 4031 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 4032 4033 The parallel matrix is partitioned such that the first m0 rows belong to 4034 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4035 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4036 4037 The DIAGONAL portion of the local submatrix of a processor can be defined 4038 as the submatrix which is obtained by extraction the part corresponding to 4039 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4040 first row that belongs to the processor, r2 is the last row belonging to 4041 the this processor, and c1-c2 is range of indices of the local part of a 4042 vector suitable for applying the matrix to. This is an mxn matrix. In the 4043 common case of a square matrix, the row and column ranges are the same and 4044 the DIAGONAL part is also square. The remaining portion of the local 4045 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4046 4047 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4048 4049 You can call MatGetInfo() to get information on how effective the preallocation was; 4050 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4051 You can also run with the option -info and look for messages with the string 4052 malloc in them to see if additional memory allocation was needed. 4053 4054 Example usage: 4055 4056 Consider the following 8x8 matrix with 34 non-zero values, that is 4057 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4058 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4059 as follows: 4060 4061 .vb 4062 1 2 0 | 0 3 0 | 0 4 4063 Proc0 0 5 6 | 7 0 0 | 8 0 4064 9 0 10 | 11 0 0 | 12 0 4065 ------------------------------------- 4066 13 0 14 | 15 16 17 | 0 0 4067 Proc1 0 18 0 | 19 20 21 | 0 0 4068 0 0 0 | 22 23 0 | 24 0 4069 ------------------------------------- 4070 Proc2 25 26 27 | 0 0 28 | 29 0 4071 30 0 0 | 31 32 33 | 0 34 4072 .ve 4073 4074 This can be represented as a collection of submatrices as: 4075 4076 .vb 4077 A B C 4078 D E F 4079 G H I 4080 .ve 4081 4082 Where the submatrices A,B,C are owned by proc0, D,E,F are 4083 owned by proc1, G,H,I are owned by proc2. 4084 4085 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4086 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4087 The 'M','N' parameters are 8,8, and have the same values on all procs. 4088 4089 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4090 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4091 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4092 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4093 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4094 matrix, ans [DF] as another SeqAIJ matrix. 4095 4096 When d_nz, o_nz parameters are specified, d_nz storage elements are 4097 allocated for every row of the local diagonal submatrix, and o_nz 4098 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4099 One way to choose d_nz and o_nz is to use the max nonzerors per local 4100 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4101 In this case, the values of d_nz,o_nz are: 4102 .vb 4103 proc0 : dnz = 2, o_nz = 2 4104 proc1 : dnz = 3, o_nz = 2 4105 proc2 : dnz = 1, o_nz = 4 4106 .ve 4107 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4108 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4109 for proc3. i.e we are using 12+15+10=37 storage locations to store 4110 34 values. 4111 4112 When d_nnz, o_nnz parameters are specified, the storage is specified 4113 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4114 In the above case the values for d_nnz,o_nnz are: 4115 .vb 4116 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4117 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4118 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4119 .ve 4120 Here the space allocated is sum of all the above values i.e 34, and 4121 hence pre-allocation is perfect. 4122 4123 Level: intermediate 4124 4125 .keywords: matrix, aij, compressed row, sparse, parallel 4126 4127 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4128 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4129 @*/ 4130 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4131 { 4132 PetscErrorCode ierr; 4133 4134 PetscFunctionBegin; 4135 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4136 PetscValidType(B,1); 4137 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4138 PetscFunctionReturn(0); 4139 } 4140 4141 #undef __FUNCT__ 4142 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4143 /*@ 4144 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4145 CSR format the local rows. 4146 4147 Collective on MPI_Comm 4148 4149 Input Parameters: 4150 + comm - MPI communicator 4151 . m - number of local rows (Cannot be PETSC_DECIDE) 4152 . n - This value should be the same as the local size used in creating the 4153 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4154 calculated if N is given) For square matrices n is almost always m. 4155 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4156 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4157 . i - row indices 4158 . j - column indices 4159 - a - matrix values 4160 4161 Output Parameter: 4162 . mat - the matrix 4163 4164 Level: intermediate 4165 4166 Notes: 4167 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4168 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4169 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4170 4171 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4172 4173 The format which is used for the sparse matrix input, is equivalent to a 4174 row-major ordering.. i.e for the following matrix, the input data expected is 4175 as shown: 4176 4177 1 0 0 4178 2 0 3 P0 4179 ------- 4180 4 5 6 P1 4181 4182 Process0 [P0]: rows_owned=[0,1] 4183 i = {0,1,3} [size = nrow+1 = 2+1] 4184 j = {0,0,2} [size = nz = 6] 4185 v = {1,2,3} [size = nz = 6] 4186 4187 Process1 [P1]: rows_owned=[2] 4188 i = {0,3} [size = nrow+1 = 1+1] 4189 j = {0,1,2} [size = nz = 6] 4190 v = {4,5,6} [size = nz = 6] 4191 4192 .keywords: matrix, aij, compressed row, sparse, parallel 4193 4194 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4195 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4196 @*/ 4197 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4198 { 4199 PetscErrorCode ierr; 4200 4201 PetscFunctionBegin; 4202 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4203 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4204 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4205 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4206 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4207 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4208 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4209 PetscFunctionReturn(0); 4210 } 4211 4212 #undef __FUNCT__ 4213 #define __FUNCT__ "MatCreateAIJ" 4214 /*@C 4215 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4216 (the default parallel PETSc format). For good matrix assembly performance 4217 the user should preallocate the matrix storage by setting the parameters 4218 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4219 performance can be increased by more than a factor of 50. 4220 4221 Collective on MPI_Comm 4222 4223 Input Parameters: 4224 + comm - MPI communicator 4225 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4226 This value should be the same as the local size used in creating the 4227 y vector for the matrix-vector product y = Ax. 4228 . n - This value should be the same as the local size used in creating the 4229 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4230 calculated if N is given) For square matrices n is almost always m. 4231 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4232 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4233 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4234 (same value is used for all local rows) 4235 . d_nnz - array containing the number of nonzeros in the various rows of the 4236 DIAGONAL portion of the local submatrix (possibly different for each row) 4237 or NULL, if d_nz is used to specify the nonzero structure. 4238 The size of this array is equal to the number of local rows, i.e 'm'. 4239 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4240 submatrix (same value is used for all local rows). 4241 - o_nnz - array containing the number of nonzeros in the various rows of the 4242 OFF-DIAGONAL portion of the local submatrix (possibly different for 4243 each row) or NULL, if o_nz is used to specify the nonzero 4244 structure. The size of this array is equal to the number 4245 of local rows, i.e 'm'. 4246 4247 Output Parameter: 4248 . A - the matrix 4249 4250 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4251 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4252 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4253 4254 Notes: 4255 If the *_nnz parameter is given then the *_nz parameter is ignored 4256 4257 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4258 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4259 storage requirements for this matrix. 4260 4261 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4262 processor than it must be used on all processors that share the object for 4263 that argument. 4264 4265 The user MUST specify either the local or global matrix dimensions 4266 (possibly both). 4267 4268 The parallel matrix is partitioned across processors such that the 4269 first m0 rows belong to process 0, the next m1 rows belong to 4270 process 1, the next m2 rows belong to process 2 etc.. where 4271 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4272 values corresponding to [m x N] submatrix. 4273 4274 The columns are logically partitioned with the n0 columns belonging 4275 to 0th partition, the next n1 columns belonging to the next 4276 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4277 4278 The DIAGONAL portion of the local submatrix on any given processor 4279 is the submatrix corresponding to the rows and columns m,n 4280 corresponding to the given processor. i.e diagonal matrix on 4281 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4282 etc. The remaining portion of the local submatrix [m x (N-n)] 4283 constitute the OFF-DIAGONAL portion. The example below better 4284 illustrates this concept. 4285 4286 For a square global matrix we define each processor's diagonal portion 4287 to be its local rows and the corresponding columns (a square submatrix); 4288 each processor's off-diagonal portion encompasses the remainder of the 4289 local matrix (a rectangular submatrix). 4290 4291 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4292 4293 When calling this routine with a single process communicator, a matrix of 4294 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4295 type of communicator, use the construction mechanism: 4296 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4297 4298 By default, this format uses inodes (identical nodes) when possible. 4299 We search for consecutive rows with the same nonzero structure, thereby 4300 reusing matrix information to achieve increased efficiency. 4301 4302 Options Database Keys: 4303 + -mat_no_inode - Do not use inodes 4304 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4305 - -mat_aij_oneindex - Internally use indexing starting at 1 4306 rather than 0. Note that when calling MatSetValues(), 4307 the user still MUST index entries starting at 0! 4308 4309 4310 Example usage: 4311 4312 Consider the following 8x8 matrix with 34 non-zero values, that is 4313 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4314 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4315 as follows: 4316 4317 .vb 4318 1 2 0 | 0 3 0 | 0 4 4319 Proc0 0 5 6 | 7 0 0 | 8 0 4320 9 0 10 | 11 0 0 | 12 0 4321 ------------------------------------- 4322 13 0 14 | 15 16 17 | 0 0 4323 Proc1 0 18 0 | 19 20 21 | 0 0 4324 0 0 0 | 22 23 0 | 24 0 4325 ------------------------------------- 4326 Proc2 25 26 27 | 0 0 28 | 29 0 4327 30 0 0 | 31 32 33 | 0 34 4328 .ve 4329 4330 This can be represented as a collection of submatrices as: 4331 4332 .vb 4333 A B C 4334 D E F 4335 G H I 4336 .ve 4337 4338 Where the submatrices A,B,C are owned by proc0, D,E,F are 4339 owned by proc1, G,H,I are owned by proc2. 4340 4341 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4342 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4343 The 'M','N' parameters are 8,8, and have the same values on all procs. 4344 4345 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4346 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4347 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4348 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4349 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4350 matrix, ans [DF] as another SeqAIJ matrix. 4351 4352 When d_nz, o_nz parameters are specified, d_nz storage elements are 4353 allocated for every row of the local diagonal submatrix, and o_nz 4354 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4355 One way to choose d_nz and o_nz is to use the max nonzerors per local 4356 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4357 In this case, the values of d_nz,o_nz are: 4358 .vb 4359 proc0 : dnz = 2, o_nz = 2 4360 proc1 : dnz = 3, o_nz = 2 4361 proc2 : dnz = 1, o_nz = 4 4362 .ve 4363 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4364 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4365 for proc3. i.e we are using 12+15+10=37 storage locations to store 4366 34 values. 4367 4368 When d_nnz, o_nnz parameters are specified, the storage is specified 4369 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4370 In the above case the values for d_nnz,o_nnz are: 4371 .vb 4372 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4373 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4374 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4375 .ve 4376 Here the space allocated is sum of all the above values i.e 34, and 4377 hence pre-allocation is perfect. 4378 4379 Level: intermediate 4380 4381 .keywords: matrix, aij, compressed row, sparse, parallel 4382 4383 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4384 MPIAIJ, MatCreateMPIAIJWithArrays() 4385 @*/ 4386 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4387 { 4388 PetscErrorCode ierr; 4389 PetscMPIInt size; 4390 4391 PetscFunctionBegin; 4392 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4393 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4394 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4395 if (size > 1) { 4396 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4397 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4398 } else { 4399 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4400 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4401 } 4402 PetscFunctionReturn(0); 4403 } 4404 4405 #undef __FUNCT__ 4406 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4407 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4408 { 4409 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4410 4411 PetscFunctionBegin; 4412 *Ad = a->A; 4413 *Ao = a->B; 4414 *colmap = a->garray; 4415 PetscFunctionReturn(0); 4416 } 4417 4418 #undef __FUNCT__ 4419 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4420 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4421 { 4422 PetscErrorCode ierr; 4423 PetscInt i; 4424 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4425 4426 PetscFunctionBegin; 4427 if (coloring->ctype == IS_COLORING_GLOBAL) { 4428 ISColoringValue *allcolors,*colors; 4429 ISColoring ocoloring; 4430 4431 /* set coloring for diagonal portion */ 4432 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4433 4434 /* set coloring for off-diagonal portion */ 4435 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4436 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4437 for (i=0; i<a->B->cmap->n; i++) { 4438 colors[i] = allcolors[a->garray[i]]; 4439 } 4440 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4441 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4442 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4443 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4444 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4445 ISColoringValue *colors; 4446 PetscInt *larray; 4447 ISColoring ocoloring; 4448 4449 /* set coloring for diagonal portion */ 4450 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4451 for (i=0; i<a->A->cmap->n; i++) { 4452 larray[i] = i + A->cmap->rstart; 4453 } 4454 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4455 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4456 for (i=0; i<a->A->cmap->n; i++) { 4457 colors[i] = coloring->colors[larray[i]]; 4458 } 4459 ierr = PetscFree(larray);CHKERRQ(ierr); 4460 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4461 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4462 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4463 4464 /* set coloring for off-diagonal portion */ 4465 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4466 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4467 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4468 for (i=0; i<a->B->cmap->n; i++) { 4469 colors[i] = coloring->colors[larray[i]]; 4470 } 4471 ierr = PetscFree(larray);CHKERRQ(ierr); 4472 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4473 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4474 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4475 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4476 PetscFunctionReturn(0); 4477 } 4478 4479 #undef __FUNCT__ 4480 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4481 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4482 { 4483 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4484 PetscErrorCode ierr; 4485 4486 PetscFunctionBegin; 4487 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4488 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4489 PetscFunctionReturn(0); 4490 } 4491 4492 #undef __FUNCT__ 4493 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4494 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4495 { 4496 PetscErrorCode ierr; 4497 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4498 PetscInt *indx; 4499 4500 PetscFunctionBegin; 4501 /* This routine will ONLY return MPIAIJ type matrix */ 4502 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4503 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4504 if (n == PETSC_DECIDE) { 4505 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4506 } 4507 /* Check sum(n) = N */ 4508 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4509 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4510 4511 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4512 rstart -= m; 4513 4514 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4515 for (i=0; i<m; i++) { 4516 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4517 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4518 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4519 } 4520 4521 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4522 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4523 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4524 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4525 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4526 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4527 PetscFunctionReturn(0); 4528 } 4529 4530 #undef __FUNCT__ 4531 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4532 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4533 { 4534 PetscErrorCode ierr; 4535 PetscInt m,N,i,rstart,nnz,Ii; 4536 PetscInt *indx; 4537 PetscScalar *values; 4538 4539 PetscFunctionBegin; 4540 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4541 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4542 for (i=0; i<m; i++) { 4543 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4544 Ii = i + rstart; 4545 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4546 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4547 } 4548 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4549 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4550 PetscFunctionReturn(0); 4551 } 4552 4553 #undef __FUNCT__ 4554 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4555 /*@ 4556 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4557 matrices from each processor 4558 4559 Collective on MPI_Comm 4560 4561 Input Parameters: 4562 + comm - the communicators the parallel matrix will live on 4563 . inmat - the input sequential matrices 4564 . n - number of local columns (or PETSC_DECIDE) 4565 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4566 4567 Output Parameter: 4568 . outmat - the parallel matrix generated 4569 4570 Level: advanced 4571 4572 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4573 4574 @*/ 4575 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4576 { 4577 PetscErrorCode ierr; 4578 PetscMPIInt size; 4579 4580 PetscFunctionBegin; 4581 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4582 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4583 if (size == 1) { 4584 if (scall == MAT_INITIAL_MATRIX) { 4585 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4586 } else { 4587 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4588 } 4589 } else { 4590 if (scall == MAT_INITIAL_MATRIX) { 4591 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4592 } 4593 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4594 } 4595 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4596 PetscFunctionReturn(0); 4597 } 4598 4599 #undef __FUNCT__ 4600 #define __FUNCT__ "MatFileSplit" 4601 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4602 { 4603 PetscErrorCode ierr; 4604 PetscMPIInt rank; 4605 PetscInt m,N,i,rstart,nnz; 4606 size_t len; 4607 const PetscInt *indx; 4608 PetscViewer out; 4609 char *name; 4610 Mat B; 4611 const PetscScalar *values; 4612 4613 PetscFunctionBegin; 4614 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4615 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4616 /* Should this be the type of the diagonal block of A? */ 4617 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4618 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4619 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4620 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4621 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4622 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4623 for (i=0; i<m; i++) { 4624 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4625 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4626 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4627 } 4628 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4629 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4630 4631 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4632 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4633 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4634 sprintf(name,"%s.%d",outfile,rank); 4635 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4636 ierr = PetscFree(name);CHKERRQ(ierr); 4637 ierr = MatView(B,out);CHKERRQ(ierr); 4638 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4639 ierr = MatDestroy(&B);CHKERRQ(ierr); 4640 PetscFunctionReturn(0); 4641 } 4642 4643 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4644 #undef __FUNCT__ 4645 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4646 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4647 { 4648 PetscErrorCode ierr; 4649 Mat_Merge_SeqsToMPI *merge; 4650 PetscContainer container; 4651 4652 PetscFunctionBegin; 4653 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4654 if (container) { 4655 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4656 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4657 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4658 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4659 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4660 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4661 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4662 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4663 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4664 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4665 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4666 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4667 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4668 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4669 ierr = PetscFree(merge);CHKERRQ(ierr); 4670 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4671 } 4672 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4673 PetscFunctionReturn(0); 4674 } 4675 4676 #include <../src/mat/utils/freespace.h> 4677 #include <petscbt.h> 4678 4679 #undef __FUNCT__ 4680 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4681 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4682 { 4683 PetscErrorCode ierr; 4684 MPI_Comm comm; 4685 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4686 PetscMPIInt size,rank,taga,*len_s; 4687 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4688 PetscInt proc,m; 4689 PetscInt **buf_ri,**buf_rj; 4690 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4691 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4692 MPI_Request *s_waits,*r_waits; 4693 MPI_Status *status; 4694 MatScalar *aa=a->a; 4695 MatScalar **abuf_r,*ba_i; 4696 Mat_Merge_SeqsToMPI *merge; 4697 PetscContainer container; 4698 4699 PetscFunctionBegin; 4700 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4701 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4702 4703 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4704 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4705 4706 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4707 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4708 4709 bi = merge->bi; 4710 bj = merge->bj; 4711 buf_ri = merge->buf_ri; 4712 buf_rj = merge->buf_rj; 4713 4714 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4715 owners = merge->rowmap->range; 4716 len_s = merge->len_s; 4717 4718 /* send and recv matrix values */ 4719 /*-----------------------------*/ 4720 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4721 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4722 4723 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4724 for (proc=0,k=0; proc<size; proc++) { 4725 if (!len_s[proc]) continue; 4726 i = owners[proc]; 4727 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4728 k++; 4729 } 4730 4731 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4732 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4733 ierr = PetscFree(status);CHKERRQ(ierr); 4734 4735 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4736 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4737 4738 /* insert mat values of mpimat */ 4739 /*----------------------------*/ 4740 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4741 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4742 4743 for (k=0; k<merge->nrecv; k++) { 4744 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4745 nrows = *(buf_ri_k[k]); 4746 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4747 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4748 } 4749 4750 /* set values of ba */ 4751 m = merge->rowmap->n; 4752 for (i=0; i<m; i++) { 4753 arow = owners[rank] + i; 4754 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4755 bnzi = bi[i+1] - bi[i]; 4756 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4757 4758 /* add local non-zero vals of this proc's seqmat into ba */ 4759 anzi = ai[arow+1] - ai[arow]; 4760 aj = a->j + ai[arow]; 4761 aa = a->a + ai[arow]; 4762 nextaj = 0; 4763 for (j=0; nextaj<anzi; j++) { 4764 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4765 ba_i[j] += aa[nextaj++]; 4766 } 4767 } 4768 4769 /* add received vals into ba */ 4770 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4771 /* i-th row */ 4772 if (i == *nextrow[k]) { 4773 anzi = *(nextai[k]+1) - *nextai[k]; 4774 aj = buf_rj[k] + *(nextai[k]); 4775 aa = abuf_r[k] + *(nextai[k]); 4776 nextaj = 0; 4777 for (j=0; nextaj<anzi; j++) { 4778 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4779 ba_i[j] += aa[nextaj++]; 4780 } 4781 } 4782 nextrow[k]++; nextai[k]++; 4783 } 4784 } 4785 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4786 } 4787 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4788 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4789 4790 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4791 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4792 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4793 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4794 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4795 PetscFunctionReturn(0); 4796 } 4797 4798 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4799 4800 #undef __FUNCT__ 4801 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4802 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4803 { 4804 PetscErrorCode ierr; 4805 Mat B_mpi; 4806 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4807 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4808 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4809 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4810 PetscInt len,proc,*dnz,*onz,bs,cbs; 4811 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4812 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4813 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4814 MPI_Status *status; 4815 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4816 PetscBT lnkbt; 4817 Mat_Merge_SeqsToMPI *merge; 4818 PetscContainer container; 4819 4820 PetscFunctionBegin; 4821 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4822 4823 /* make sure it is a PETSc comm */ 4824 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4825 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4826 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4827 4828 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4829 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4830 4831 /* determine row ownership */ 4832 /*---------------------------------------------------------*/ 4833 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4834 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4835 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4836 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4837 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4838 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4839 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4840 4841 m = merge->rowmap->n; 4842 owners = merge->rowmap->range; 4843 4844 /* determine the number of messages to send, their lengths */ 4845 /*---------------------------------------------------------*/ 4846 len_s = merge->len_s; 4847 4848 len = 0; /* length of buf_si[] */ 4849 merge->nsend = 0; 4850 for (proc=0; proc<size; proc++) { 4851 len_si[proc] = 0; 4852 if (proc == rank) { 4853 len_s[proc] = 0; 4854 } else { 4855 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4856 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4857 } 4858 if (len_s[proc]) { 4859 merge->nsend++; 4860 nrows = 0; 4861 for (i=owners[proc]; i<owners[proc+1]; i++) { 4862 if (ai[i+1] > ai[i]) nrows++; 4863 } 4864 len_si[proc] = 2*(nrows+1); 4865 len += len_si[proc]; 4866 } 4867 } 4868 4869 /* determine the number and length of messages to receive for ij-structure */ 4870 /*-------------------------------------------------------------------------*/ 4871 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4872 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4873 4874 /* post the Irecv of j-structure */ 4875 /*-------------------------------*/ 4876 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4877 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4878 4879 /* post the Isend of j-structure */ 4880 /*--------------------------------*/ 4881 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4882 4883 for (proc=0, k=0; proc<size; proc++) { 4884 if (!len_s[proc]) continue; 4885 i = owners[proc]; 4886 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4887 k++; 4888 } 4889 4890 /* receives and sends of j-structure are complete */ 4891 /*------------------------------------------------*/ 4892 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4893 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4894 4895 /* send and recv i-structure */ 4896 /*---------------------------*/ 4897 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4898 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4899 4900 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4901 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4902 for (proc=0,k=0; proc<size; proc++) { 4903 if (!len_s[proc]) continue; 4904 /* form outgoing message for i-structure: 4905 buf_si[0]: nrows to be sent 4906 [1:nrows]: row index (global) 4907 [nrows+1:2*nrows+1]: i-structure index 4908 */ 4909 /*-------------------------------------------*/ 4910 nrows = len_si[proc]/2 - 1; 4911 buf_si_i = buf_si + nrows+1; 4912 buf_si[0] = nrows; 4913 buf_si_i[0] = 0; 4914 nrows = 0; 4915 for (i=owners[proc]; i<owners[proc+1]; i++) { 4916 anzi = ai[i+1] - ai[i]; 4917 if (anzi) { 4918 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4919 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4920 nrows++; 4921 } 4922 } 4923 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4924 k++; 4925 buf_si += len_si[proc]; 4926 } 4927 4928 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4929 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4930 4931 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4932 for (i=0; i<merge->nrecv; i++) { 4933 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4934 } 4935 4936 ierr = PetscFree(len_si);CHKERRQ(ierr); 4937 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4938 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4939 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4940 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4941 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4942 ierr = PetscFree(status);CHKERRQ(ierr); 4943 4944 /* compute a local seq matrix in each processor */ 4945 /*----------------------------------------------*/ 4946 /* allocate bi array and free space for accumulating nonzero column info */ 4947 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4948 bi[0] = 0; 4949 4950 /* create and initialize a linked list */ 4951 nlnk = N+1; 4952 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4953 4954 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4955 len = ai[owners[rank+1]] - ai[owners[rank]]; 4956 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4957 4958 current_space = free_space; 4959 4960 /* determine symbolic info for each local row */ 4961 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4962 4963 for (k=0; k<merge->nrecv; k++) { 4964 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4965 nrows = *buf_ri_k[k]; 4966 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4967 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4968 } 4969 4970 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4971 len = 0; 4972 for (i=0; i<m; i++) { 4973 bnzi = 0; 4974 /* add local non-zero cols of this proc's seqmat into lnk */ 4975 arow = owners[rank] + i; 4976 anzi = ai[arow+1] - ai[arow]; 4977 aj = a->j + ai[arow]; 4978 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4979 bnzi += nlnk; 4980 /* add received col data into lnk */ 4981 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4982 if (i == *nextrow[k]) { /* i-th row */ 4983 anzi = *(nextai[k]+1) - *nextai[k]; 4984 aj = buf_rj[k] + *nextai[k]; 4985 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4986 bnzi += nlnk; 4987 nextrow[k]++; nextai[k]++; 4988 } 4989 } 4990 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4991 4992 /* if free space is not available, make more free space */ 4993 if (current_space->local_remaining<bnzi) { 4994 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4995 nspacedouble++; 4996 } 4997 /* copy data into free space, then initialize lnk */ 4998 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4999 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5000 5001 current_space->array += bnzi; 5002 current_space->local_used += bnzi; 5003 current_space->local_remaining -= bnzi; 5004 5005 bi[i+1] = bi[i] + bnzi; 5006 } 5007 5008 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5009 5010 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 5011 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5012 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5013 5014 /* create symbolic parallel matrix B_mpi */ 5015 /*---------------------------------------*/ 5016 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5017 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5018 if (n==PETSC_DECIDE) { 5019 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5020 } else { 5021 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5022 } 5023 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5024 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5025 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5026 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5027 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5028 5029 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5030 B_mpi->assembled = PETSC_FALSE; 5031 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5032 merge->bi = bi; 5033 merge->bj = bj; 5034 merge->buf_ri = buf_ri; 5035 merge->buf_rj = buf_rj; 5036 merge->coi = NULL; 5037 merge->coj = NULL; 5038 merge->owners_co = NULL; 5039 5040 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5041 5042 /* attach the supporting struct to B_mpi for reuse */ 5043 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5044 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5045 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5046 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5047 *mpimat = B_mpi; 5048 5049 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5050 PetscFunctionReturn(0); 5051 } 5052 5053 #undef __FUNCT__ 5054 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 5055 /*@C 5056 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 5057 matrices from each processor 5058 5059 Collective on MPI_Comm 5060 5061 Input Parameters: 5062 + comm - the communicators the parallel matrix will live on 5063 . seqmat - the input sequential matrices 5064 . m - number of local rows (or PETSC_DECIDE) 5065 . n - number of local columns (or PETSC_DECIDE) 5066 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5067 5068 Output Parameter: 5069 . mpimat - the parallel matrix generated 5070 5071 Level: advanced 5072 5073 Notes: 5074 The dimensions of the sequential matrix in each processor MUST be the same. 5075 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5076 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5077 @*/ 5078 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5079 { 5080 PetscErrorCode ierr; 5081 PetscMPIInt size; 5082 5083 PetscFunctionBegin; 5084 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5085 if (size == 1) { 5086 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5087 if (scall == MAT_INITIAL_MATRIX) { 5088 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5089 } else { 5090 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5091 } 5092 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5093 PetscFunctionReturn(0); 5094 } 5095 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5096 if (scall == MAT_INITIAL_MATRIX) { 5097 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5098 } 5099 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5100 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5101 PetscFunctionReturn(0); 5102 } 5103 5104 #undef __FUNCT__ 5105 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5106 /*@ 5107 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5108 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5109 with MatGetSize() 5110 5111 Not Collective 5112 5113 Input Parameters: 5114 + A - the matrix 5115 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5116 5117 Output Parameter: 5118 . A_loc - the local sequential matrix generated 5119 5120 Level: developer 5121 5122 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5123 5124 @*/ 5125 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5126 { 5127 PetscErrorCode ierr; 5128 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5129 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5130 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5131 MatScalar *aa=a->a,*ba=b->a,*cam; 5132 PetscScalar *ca; 5133 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5134 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5135 PetscBool match; 5136 5137 PetscFunctionBegin; 5138 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5139 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5140 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5141 if (scall == MAT_INITIAL_MATRIX) { 5142 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5143 ci[0] = 0; 5144 for (i=0; i<am; i++) { 5145 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5146 } 5147 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5148 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5149 k = 0; 5150 for (i=0; i<am; i++) { 5151 ncols_o = bi[i+1] - bi[i]; 5152 ncols_d = ai[i+1] - ai[i]; 5153 /* off-diagonal portion of A */ 5154 for (jo=0; jo<ncols_o; jo++) { 5155 col = cmap[*bj]; 5156 if (col >= cstart) break; 5157 cj[k] = col; bj++; 5158 ca[k++] = *ba++; 5159 } 5160 /* diagonal portion of A */ 5161 for (j=0; j<ncols_d; j++) { 5162 cj[k] = cstart + *aj++; 5163 ca[k++] = *aa++; 5164 } 5165 /* off-diagonal portion of A */ 5166 for (j=jo; j<ncols_o; j++) { 5167 cj[k] = cmap[*bj++]; 5168 ca[k++] = *ba++; 5169 } 5170 } 5171 /* put together the new matrix */ 5172 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5173 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5174 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5175 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5176 mat->free_a = PETSC_TRUE; 5177 mat->free_ij = PETSC_TRUE; 5178 mat->nonew = 0; 5179 } else if (scall == MAT_REUSE_MATRIX) { 5180 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5181 ci = mat->i; cj = mat->j; cam = mat->a; 5182 for (i=0; i<am; i++) { 5183 /* off-diagonal portion of A */ 5184 ncols_o = bi[i+1] - bi[i]; 5185 for (jo=0; jo<ncols_o; jo++) { 5186 col = cmap[*bj]; 5187 if (col >= cstart) break; 5188 *cam++ = *ba++; bj++; 5189 } 5190 /* diagonal portion of A */ 5191 ncols_d = ai[i+1] - ai[i]; 5192 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5193 /* off-diagonal portion of A */ 5194 for (j=jo; j<ncols_o; j++) { 5195 *cam++ = *ba++; bj++; 5196 } 5197 } 5198 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5199 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5200 PetscFunctionReturn(0); 5201 } 5202 5203 #undef __FUNCT__ 5204 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5205 /*@C 5206 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5207 5208 Not Collective 5209 5210 Input Parameters: 5211 + A - the matrix 5212 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5213 - row, col - index sets of rows and columns to extract (or NULL) 5214 5215 Output Parameter: 5216 . A_loc - the local sequential matrix generated 5217 5218 Level: developer 5219 5220 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5221 5222 @*/ 5223 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5224 { 5225 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5226 PetscErrorCode ierr; 5227 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5228 IS isrowa,iscola; 5229 Mat *aloc; 5230 PetscBool match; 5231 5232 PetscFunctionBegin; 5233 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5234 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5235 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5236 if (!row) { 5237 start = A->rmap->rstart; end = A->rmap->rend; 5238 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5239 } else { 5240 isrowa = *row; 5241 } 5242 if (!col) { 5243 start = A->cmap->rstart; 5244 cmap = a->garray; 5245 nzA = a->A->cmap->n; 5246 nzB = a->B->cmap->n; 5247 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5248 ncols = 0; 5249 for (i=0; i<nzB; i++) { 5250 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5251 else break; 5252 } 5253 imark = i; 5254 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5255 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5256 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5257 } else { 5258 iscola = *col; 5259 } 5260 if (scall != MAT_INITIAL_MATRIX) { 5261 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5262 aloc[0] = *A_loc; 5263 } 5264 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5265 *A_loc = aloc[0]; 5266 ierr = PetscFree(aloc);CHKERRQ(ierr); 5267 if (!row) { 5268 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5269 } 5270 if (!col) { 5271 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5272 } 5273 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5274 PetscFunctionReturn(0); 5275 } 5276 5277 #undef __FUNCT__ 5278 #define __FUNCT__ "MatGetBrowsOfAcols" 5279 /*@C 5280 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5281 5282 Collective on Mat 5283 5284 Input Parameters: 5285 + A,B - the matrices in mpiaij format 5286 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5287 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5288 5289 Output Parameter: 5290 + rowb, colb - index sets of rows and columns of B to extract 5291 - B_seq - the sequential matrix generated 5292 5293 Level: developer 5294 5295 @*/ 5296 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5297 { 5298 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5299 PetscErrorCode ierr; 5300 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5301 IS isrowb,iscolb; 5302 Mat *bseq=NULL; 5303 5304 PetscFunctionBegin; 5305 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5306 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5307 } 5308 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5309 5310 if (scall == MAT_INITIAL_MATRIX) { 5311 start = A->cmap->rstart; 5312 cmap = a->garray; 5313 nzA = a->A->cmap->n; 5314 nzB = a->B->cmap->n; 5315 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5316 ncols = 0; 5317 for (i=0; i<nzB; i++) { /* row < local row index */ 5318 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5319 else break; 5320 } 5321 imark = i; 5322 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5323 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5324 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5325 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5326 } else { 5327 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5328 isrowb = *rowb; iscolb = *colb; 5329 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5330 bseq[0] = *B_seq; 5331 } 5332 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5333 *B_seq = bseq[0]; 5334 ierr = PetscFree(bseq);CHKERRQ(ierr); 5335 if (!rowb) { 5336 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5337 } else { 5338 *rowb = isrowb; 5339 } 5340 if (!colb) { 5341 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5342 } else { 5343 *colb = iscolb; 5344 } 5345 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5346 PetscFunctionReturn(0); 5347 } 5348 5349 #undef __FUNCT__ 5350 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5351 /* 5352 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5353 of the OFF-DIAGONAL portion of local A 5354 5355 Collective on Mat 5356 5357 Input Parameters: 5358 + A,B - the matrices in mpiaij format 5359 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5360 5361 Output Parameter: 5362 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5363 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5364 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5365 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5366 5367 Level: developer 5368 5369 */ 5370 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5371 { 5372 VecScatter_MPI_General *gen_to,*gen_from; 5373 PetscErrorCode ierr; 5374 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5375 Mat_SeqAIJ *b_oth; 5376 VecScatter ctx =a->Mvctx; 5377 MPI_Comm comm; 5378 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5379 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5380 PetscScalar *rvalues,*svalues; 5381 MatScalar *b_otha,*bufa,*bufA; 5382 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5383 MPI_Request *rwaits = NULL,*swaits = NULL; 5384 MPI_Status *sstatus,rstatus; 5385 PetscMPIInt jj; 5386 PetscInt *cols,sbs,rbs; 5387 PetscScalar *vals; 5388 5389 PetscFunctionBegin; 5390 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5391 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5392 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5393 } 5394 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5395 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5396 5397 gen_to = (VecScatter_MPI_General*)ctx->todata; 5398 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5399 rvalues = gen_from->values; /* holds the length of receiving row */ 5400 svalues = gen_to->values; /* holds the length of sending row */ 5401 nrecvs = gen_from->n; 5402 nsends = gen_to->n; 5403 5404 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5405 srow = gen_to->indices; /* local row index to be sent */ 5406 sstarts = gen_to->starts; 5407 sprocs = gen_to->procs; 5408 sstatus = gen_to->sstatus; 5409 sbs = gen_to->bs; 5410 rstarts = gen_from->starts; 5411 rprocs = gen_from->procs; 5412 rbs = gen_from->bs; 5413 5414 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5415 if (scall == MAT_INITIAL_MATRIX) { 5416 /* i-array */ 5417 /*---------*/ 5418 /* post receives */ 5419 for (i=0; i<nrecvs; i++) { 5420 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5421 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5422 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5423 } 5424 5425 /* pack the outgoing message */ 5426 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5427 5428 sstartsj[0] = 0; 5429 rstartsj[0] = 0; 5430 len = 0; /* total length of j or a array to be sent */ 5431 k = 0; 5432 for (i=0; i<nsends; i++) { 5433 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5434 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5435 for (j=0; j<nrows; j++) { 5436 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5437 for (l=0; l<sbs; l++) { 5438 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5439 5440 rowlen[j*sbs+l] = ncols; 5441 5442 len += ncols; 5443 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5444 } 5445 k++; 5446 } 5447 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5448 5449 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5450 } 5451 /* recvs and sends of i-array are completed */ 5452 i = nrecvs; 5453 while (i--) { 5454 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5455 } 5456 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5457 5458 /* allocate buffers for sending j and a arrays */ 5459 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5460 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5461 5462 /* create i-array of B_oth */ 5463 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5464 5465 b_othi[0] = 0; 5466 len = 0; /* total length of j or a array to be received */ 5467 k = 0; 5468 for (i=0; i<nrecvs; i++) { 5469 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5470 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5471 for (j=0; j<nrows; j++) { 5472 b_othi[k+1] = b_othi[k] + rowlen[j]; 5473 len += rowlen[j]; k++; 5474 } 5475 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5476 } 5477 5478 /* allocate space for j and a arrrays of B_oth */ 5479 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5480 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5481 5482 /* j-array */ 5483 /*---------*/ 5484 /* post receives of j-array */ 5485 for (i=0; i<nrecvs; i++) { 5486 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5487 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5488 } 5489 5490 /* pack the outgoing message j-array */ 5491 k = 0; 5492 for (i=0; i<nsends; i++) { 5493 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5494 bufJ = bufj+sstartsj[i]; 5495 for (j=0; j<nrows; j++) { 5496 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5497 for (ll=0; ll<sbs; ll++) { 5498 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5499 for (l=0; l<ncols; l++) { 5500 *bufJ++ = cols[l]; 5501 } 5502 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5503 } 5504 } 5505 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5506 } 5507 5508 /* recvs and sends of j-array are completed */ 5509 i = nrecvs; 5510 while (i--) { 5511 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5512 } 5513 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5514 } else if (scall == MAT_REUSE_MATRIX) { 5515 sstartsj = *startsj_s; 5516 rstartsj = *startsj_r; 5517 bufa = *bufa_ptr; 5518 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5519 b_otha = b_oth->a; 5520 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5521 5522 /* a-array */ 5523 /*---------*/ 5524 /* post receives of a-array */ 5525 for (i=0; i<nrecvs; i++) { 5526 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5527 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5528 } 5529 5530 /* pack the outgoing message a-array */ 5531 k = 0; 5532 for (i=0; i<nsends; i++) { 5533 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5534 bufA = bufa+sstartsj[i]; 5535 for (j=0; j<nrows; j++) { 5536 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5537 for (ll=0; ll<sbs; ll++) { 5538 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5539 for (l=0; l<ncols; l++) { 5540 *bufA++ = vals[l]; 5541 } 5542 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5543 } 5544 } 5545 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5546 } 5547 /* recvs and sends of a-array are completed */ 5548 i = nrecvs; 5549 while (i--) { 5550 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5551 } 5552 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5553 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5554 5555 if (scall == MAT_INITIAL_MATRIX) { 5556 /* put together the new matrix */ 5557 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5558 5559 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5560 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5561 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5562 b_oth->free_a = PETSC_TRUE; 5563 b_oth->free_ij = PETSC_TRUE; 5564 b_oth->nonew = 0; 5565 5566 ierr = PetscFree(bufj);CHKERRQ(ierr); 5567 if (!startsj_s || !bufa_ptr) { 5568 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5569 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5570 } else { 5571 *startsj_s = sstartsj; 5572 *startsj_r = rstartsj; 5573 *bufa_ptr = bufa; 5574 } 5575 } 5576 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5577 PetscFunctionReturn(0); 5578 } 5579 5580 #undef __FUNCT__ 5581 #define __FUNCT__ "MatGetCommunicationStructs" 5582 /*@C 5583 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5584 5585 Not Collective 5586 5587 Input Parameters: 5588 . A - The matrix in mpiaij format 5589 5590 Output Parameter: 5591 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5592 . colmap - A map from global column index to local index into lvec 5593 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5594 5595 Level: developer 5596 5597 @*/ 5598 #if defined(PETSC_USE_CTABLE) 5599 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5600 #else 5601 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5602 #endif 5603 { 5604 Mat_MPIAIJ *a; 5605 5606 PetscFunctionBegin; 5607 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5608 PetscValidPointer(lvec, 2); 5609 PetscValidPointer(colmap, 3); 5610 PetscValidPointer(multScatter, 4); 5611 a = (Mat_MPIAIJ*) A->data; 5612 if (lvec) *lvec = a->lvec; 5613 if (colmap) *colmap = a->colmap; 5614 if (multScatter) *multScatter = a->Mvctx; 5615 PetscFunctionReturn(0); 5616 } 5617 5618 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5619 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5620 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5621 5622 #undef __FUNCT__ 5623 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5624 /* 5625 Computes (B'*A')' since computing B*A directly is untenable 5626 5627 n p p 5628 ( ) ( ) ( ) 5629 m ( A ) * n ( B ) = m ( C ) 5630 ( ) ( ) ( ) 5631 5632 */ 5633 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5634 { 5635 PetscErrorCode ierr; 5636 Mat At,Bt,Ct; 5637 5638 PetscFunctionBegin; 5639 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5640 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5641 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5642 ierr = MatDestroy(&At);CHKERRQ(ierr); 5643 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5644 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5645 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5646 PetscFunctionReturn(0); 5647 } 5648 5649 #undef __FUNCT__ 5650 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5651 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5652 { 5653 PetscErrorCode ierr; 5654 PetscInt m=A->rmap->n,n=B->cmap->n; 5655 Mat Cmat; 5656 5657 PetscFunctionBegin; 5658 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5659 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5660 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5661 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5662 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5663 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5664 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5665 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5666 5667 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5668 5669 *C = Cmat; 5670 PetscFunctionReturn(0); 5671 } 5672 5673 /* ----------------------------------------------------------------*/ 5674 #undef __FUNCT__ 5675 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5676 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5677 { 5678 PetscErrorCode ierr; 5679 5680 PetscFunctionBegin; 5681 if (scall == MAT_INITIAL_MATRIX) { 5682 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5683 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5684 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5685 } 5686 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5687 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5688 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5689 PetscFunctionReturn(0); 5690 } 5691 5692 #if defined(PETSC_HAVE_MUMPS) 5693 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5694 #endif 5695 #if defined(PETSC_HAVE_PASTIX) 5696 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5697 #endif 5698 #if defined(PETSC_HAVE_SUPERLU_DIST) 5699 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5700 #endif 5701 #if defined(PETSC_HAVE_CLIQUE) 5702 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5703 #endif 5704 5705 /*MC 5706 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5707 5708 Options Database Keys: 5709 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5710 5711 Level: beginner 5712 5713 .seealso: MatCreateAIJ() 5714 M*/ 5715 5716 #undef __FUNCT__ 5717 #define __FUNCT__ "MatCreate_MPIAIJ" 5718 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5719 { 5720 Mat_MPIAIJ *b; 5721 PetscErrorCode ierr; 5722 PetscMPIInt size; 5723 5724 PetscFunctionBegin; 5725 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5726 5727 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5728 B->data = (void*)b; 5729 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5730 B->assembled = PETSC_FALSE; 5731 B->insertmode = NOT_SET_VALUES; 5732 b->size = size; 5733 5734 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5735 5736 /* build cache for off array entries formed */ 5737 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5738 5739 b->donotstash = PETSC_FALSE; 5740 b->colmap = 0; 5741 b->garray = 0; 5742 b->roworiented = PETSC_TRUE; 5743 5744 /* stuff used for matrix vector multiply */ 5745 b->lvec = NULL; 5746 b->Mvctx = NULL; 5747 5748 /* stuff for MatGetRow() */ 5749 b->rowindices = 0; 5750 b->rowvalues = 0; 5751 b->getrowactive = PETSC_FALSE; 5752 5753 /* flexible pointer used in CUSP/CUSPARSE classes */ 5754 b->spptr = NULL; 5755 5756 #if defined(PETSC_HAVE_MUMPS) 5757 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5758 #endif 5759 #if defined(PETSC_HAVE_PASTIX) 5760 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5761 #endif 5762 #if defined(PETSC_HAVE_SUPERLU_DIST) 5763 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5764 #endif 5765 #if defined(PETSC_HAVE_CLIQUE) 5766 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5767 #endif 5768 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5769 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5770 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5771 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5772 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5773 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5774 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5775 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5776 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5777 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5778 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5779 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5780 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5781 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5782 PetscFunctionReturn(0); 5783 } 5784 5785 #undef __FUNCT__ 5786 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5787 /*@ 5788 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5789 and "off-diagonal" part of the matrix in CSR format. 5790 5791 Collective on MPI_Comm 5792 5793 Input Parameters: 5794 + comm - MPI communicator 5795 . m - number of local rows (Cannot be PETSC_DECIDE) 5796 . n - This value should be the same as the local size used in creating the 5797 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5798 calculated if N is given) For square matrices n is almost always m. 5799 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5800 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5801 . i - row indices for "diagonal" portion of matrix 5802 . j - column indices 5803 . a - matrix values 5804 . oi - row indices for "off-diagonal" portion of matrix 5805 . oj - column indices 5806 - oa - matrix values 5807 5808 Output Parameter: 5809 . mat - the matrix 5810 5811 Level: advanced 5812 5813 Notes: 5814 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5815 must free the arrays once the matrix has been destroyed and not before. 5816 5817 The i and j indices are 0 based 5818 5819 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5820 5821 This sets local rows and cannot be used to set off-processor values. 5822 5823 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5824 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5825 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5826 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5827 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5828 communication if it is known that only local entries will be set. 5829 5830 .keywords: matrix, aij, compressed row, sparse, parallel 5831 5832 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5833 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5834 @*/ 5835 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5836 { 5837 PetscErrorCode ierr; 5838 Mat_MPIAIJ *maij; 5839 5840 PetscFunctionBegin; 5841 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5842 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5843 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5844 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5845 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5846 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5847 maij = (Mat_MPIAIJ*) (*mat)->data; 5848 5849 (*mat)->preallocated = PETSC_TRUE; 5850 5851 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5852 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5853 5854 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5855 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5856 5857 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5858 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5859 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5860 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5861 5862 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5863 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5864 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5865 PetscFunctionReturn(0); 5866 } 5867 5868 /* 5869 Special version for direct calls from Fortran 5870 */ 5871 #include <petsc-private/fortranimpl.h> 5872 5873 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5874 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5875 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5876 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5877 #endif 5878 5879 /* Change these macros so can be used in void function */ 5880 #undef CHKERRQ 5881 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5882 #undef SETERRQ2 5883 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5884 #undef SETERRQ3 5885 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5886 #undef SETERRQ 5887 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5888 5889 #undef __FUNCT__ 5890 #define __FUNCT__ "matsetvaluesmpiaij_" 5891 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5892 { 5893 Mat mat = *mmat; 5894 PetscInt m = *mm, n = *mn; 5895 InsertMode addv = *maddv; 5896 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5897 PetscScalar value; 5898 PetscErrorCode ierr; 5899 5900 MatCheckPreallocated(mat,1); 5901 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5902 5903 #if defined(PETSC_USE_DEBUG) 5904 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5905 #endif 5906 { 5907 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5908 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5909 PetscBool roworiented = aij->roworiented; 5910 5911 /* Some Variables required in the macro */ 5912 Mat A = aij->A; 5913 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5914 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5915 MatScalar *aa = a->a; 5916 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5917 Mat B = aij->B; 5918 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5919 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5920 MatScalar *ba = b->a; 5921 5922 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5923 PetscInt nonew = a->nonew; 5924 MatScalar *ap1,*ap2; 5925 5926 PetscFunctionBegin; 5927 for (i=0; i<m; i++) { 5928 if (im[i] < 0) continue; 5929 #if defined(PETSC_USE_DEBUG) 5930 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5931 #endif 5932 if (im[i] >= rstart && im[i] < rend) { 5933 row = im[i] - rstart; 5934 lastcol1 = -1; 5935 rp1 = aj + ai[row]; 5936 ap1 = aa + ai[row]; 5937 rmax1 = aimax[row]; 5938 nrow1 = ailen[row]; 5939 low1 = 0; 5940 high1 = nrow1; 5941 lastcol2 = -1; 5942 rp2 = bj + bi[row]; 5943 ap2 = ba + bi[row]; 5944 rmax2 = bimax[row]; 5945 nrow2 = bilen[row]; 5946 low2 = 0; 5947 high2 = nrow2; 5948 5949 for (j=0; j<n; j++) { 5950 if (roworiented) value = v[i*n+j]; 5951 else value = v[i+j*m]; 5952 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5953 if (in[j] >= cstart && in[j] < cend) { 5954 col = in[j] - cstart; 5955 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5956 } else if (in[j] < 0) continue; 5957 #if defined(PETSC_USE_DEBUG) 5958 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5959 #endif 5960 else { 5961 if (mat->was_assembled) { 5962 if (!aij->colmap) { 5963 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5964 } 5965 #if defined(PETSC_USE_CTABLE) 5966 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5967 col--; 5968 #else 5969 col = aij->colmap[in[j]] - 1; 5970 #endif 5971 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5972 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5973 col = in[j]; 5974 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5975 B = aij->B; 5976 b = (Mat_SeqAIJ*)B->data; 5977 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5978 rp2 = bj + bi[row]; 5979 ap2 = ba + bi[row]; 5980 rmax2 = bimax[row]; 5981 nrow2 = bilen[row]; 5982 low2 = 0; 5983 high2 = nrow2; 5984 bm = aij->B->rmap->n; 5985 ba = b->a; 5986 } 5987 } else col = in[j]; 5988 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5989 } 5990 } 5991 } else if (!aij->donotstash) { 5992 if (roworiented) { 5993 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5994 } else { 5995 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5996 } 5997 } 5998 } 5999 } 6000 PetscFunctionReturnVoid(); 6001 } 6002 6003