1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 if (!rank) { 206 bses[0] = gmat->rmap->bs; 207 bses[1] = gmat->cmap->bs; 208 } 209 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 210 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 211 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 212 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 213 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 214 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 215 216 rowners[0] = 0; 217 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 218 rstart = rowners[rank]; 219 rend = rowners[rank+1]; 220 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 221 if (!rank) { 222 gmata = (Mat_SeqAIJ*) gmat->data; 223 /* send row lengths to all processors */ 224 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 225 for (i=1; i<size; i++) { 226 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 227 } 228 /* determine number diagonal and off-diagonal counts */ 229 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 230 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 264 jj = 0; 265 for (i=0; i<m; i++) { 266 for (j=0; j<dlens[i]; j++) { 267 if (gmataj[jj] < rstart) ld[i]++; 268 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 269 jj++; 270 } 271 } 272 /* receive numerical values */ 273 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 275 } 276 /* set preallocation */ 277 for (i=0; i<m; i++) { 278 dlens[i] -= olens[i]; 279 } 280 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 281 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 282 283 for (i=0; i<m; i++) { 284 dlens[i] += olens[i]; 285 } 286 cnt = 0; 287 for (i=0; i<m; i++) { 288 row = rstart + i; 289 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 290 cnt += dlens[i]; 291 } 292 if (rank) { 293 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 294 } 295 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 296 ierr = PetscFree(rowners);CHKERRQ(ierr); 297 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 300 *inmat = mat; 301 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 302 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 303 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 304 mat = *inmat; 305 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 306 if (!rank) { 307 /* send numerical values to other processes */ 308 gmata = (Mat_SeqAIJ*) gmat->data; 309 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 310 gmataa = gmata->a; 311 for (i=1; i<size; i++) { 312 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 313 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 314 } 315 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 316 } else { 317 /* receive numerical values from process 0*/ 318 nz = Ad->nz + Ao->nz; 319 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 320 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 321 } 322 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 323 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 324 ad = Ad->a; 325 ao = Ao->a; 326 if (mat->rmap->n) { 327 i = 0; 328 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 329 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 330 } 331 for (i=1; i<mat->rmap->n; i++) { 332 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 333 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 334 } 335 i--; 336 if (mat->rmap->n) { 337 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 338 } 339 if (rank) { 340 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 341 } 342 } 343 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined(PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++) { 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 373 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 374 #endif 375 PetscFunctionReturn(0); 376 } 377 378 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 379 { \ 380 if (col <= lastcol1) low1 = 0; \ 381 else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 A->nonzerostate++;\ 409 a_noinsert: ; \ 410 ailen[row] = nrow1; \ 411 } 412 413 414 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 415 { \ 416 if (col <= lastcol2) low2 = 0; \ 417 else high2 = nrow2; \ 418 lastcol2 = col; \ 419 while (high2-low2 > 5) { \ 420 t = (low2+high2)/2; \ 421 if (rp2[t] > col) high2 = t; \ 422 else low2 = t; \ 423 } \ 424 for (_i=low2; _i<high2; _i++) { \ 425 if (rp2[_i] > col) break; \ 426 if (rp2[_i] == col) { \ 427 if (addv == ADD_VALUES) ap2[_i] += value; \ 428 else ap2[_i] = value; \ 429 goto b_noinsert; \ 430 } \ 431 } \ 432 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 433 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 434 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 435 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 436 N = nrow2++ - 1; b->nz++; high2++; \ 437 /* shift up all the later entries in this row */ \ 438 for (ii=N; ii>=_i; ii--) { \ 439 rp2[ii+1] = rp2[ii]; \ 440 ap2[ii+1] = ap2[ii]; \ 441 } \ 442 rp2[_i] = col; \ 443 ap2[_i] = value; \ 444 B->nonzerostate++; \ 445 b_noinsert: ; \ 446 bilen[row] = nrow2; \ 447 } 448 449 #undef __FUNCT__ 450 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 451 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 452 { 453 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 454 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 455 PetscErrorCode ierr; 456 PetscInt l,*garray = mat->garray,diag; 457 458 PetscFunctionBegin; 459 /* code only works for square matrices A */ 460 461 /* find size of row to the left of the diagonal part */ 462 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 463 row = row - diag; 464 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 465 if (garray[b->j[b->i[row]+l]] > diag) break; 466 } 467 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* diagonal part */ 470 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 471 472 /* right of diagonal part */ 473 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatSetValues_MPIAIJ" 479 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 PetscScalar value; 483 PetscErrorCode ierr; 484 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 485 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 486 PetscBool roworiented = aij->roworiented; 487 488 /* Some Variables required in the macro */ 489 Mat A = aij->A; 490 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 491 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 492 MatScalar *aa = a->a; 493 PetscBool ignorezeroentries = a->ignorezeroentries; 494 Mat B = aij->B; 495 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 496 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 497 MatScalar *ba = b->a; 498 499 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 500 PetscInt nonew; 501 MatScalar *ap1,*ap2; 502 503 PetscFunctionBegin; 504 if (v) PetscValidScalarPointer(v,6); 505 for (i=0; i<m; i++) { 506 if (im[i] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 509 #endif 510 if (im[i] >= rstart && im[i] < rend) { 511 row = im[i] - rstart; 512 lastcol1 = -1; 513 rp1 = aj + ai[row]; 514 ap1 = aa + ai[row]; 515 rmax1 = aimax[row]; 516 nrow1 = ailen[row]; 517 low1 = 0; 518 high1 = nrow1; 519 lastcol2 = -1; 520 rp2 = bj + bi[row]; 521 ap2 = ba + bi[row]; 522 rmax2 = bimax[row]; 523 nrow2 = bilen[row]; 524 low2 = 0; 525 high2 = nrow2; 526 527 for (j=0; j<n; j++) { 528 if (v) { 529 if (roworiented) value = v[i*n+j]; 530 else value = v[i+j*m]; 531 } else value = 0.0; 532 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 533 if (in[j] >= cstart && in[j] < cend) { 534 col = in[j] - cstart; 535 nonew = a->nonew; 536 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 537 } else if (in[j] < 0) continue; 538 #if defined(PETSC_USE_DEBUG) 539 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 540 #endif 541 else { 542 if (mat->was_assembled) { 543 if (!aij->colmap) { 544 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 545 } 546 #if defined(PETSC_USE_CTABLE) 547 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 548 col--; 549 #else 550 col = aij->colmap[in[j]] - 1; 551 #endif 552 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 553 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 554 col = in[j]; 555 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 556 B = aij->B; 557 b = (Mat_SeqAIJ*)B->data; 558 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 559 rp2 = bj + bi[row]; 560 ap2 = ba + bi[row]; 561 rmax2 = bimax[row]; 562 nrow2 = bilen[row]; 563 low2 = 0; 564 high2 = nrow2; 565 bm = aij->B->rmap->n; 566 ba = b->a; 567 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 568 } else col = in[j]; 569 nonew = b->nonew; 570 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 571 } 572 } 573 } else { 574 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 575 if (!aij->donotstash) { 576 mat->assembled = PETSC_FALSE; 577 if (roworiented) { 578 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 579 } else { 580 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } 582 } 583 } 584 } 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatGetValues_MPIAIJ" 590 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 591 { 592 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 593 PetscErrorCode ierr; 594 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 595 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 596 597 PetscFunctionBegin; 598 for (i=0; i<m; i++) { 599 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 600 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 601 if (idxm[i] >= rstart && idxm[i] < rend) { 602 row = idxm[i] - rstart; 603 for (j=0; j<n; j++) { 604 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 605 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 606 if (idxn[j] >= cstart && idxn[j] < cend) { 607 col = idxn[j] - cstart; 608 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 609 } else { 610 if (!aij->colmap) { 611 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 612 } 613 #if defined(PETSC_USE_CTABLE) 614 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 615 col--; 616 #else 617 col = aij->colmap[idxn[j]] - 1; 618 #endif 619 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 620 else { 621 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 622 } 623 } 624 } 625 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 626 } 627 PetscFunctionReturn(0); 628 } 629 630 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 631 632 #undef __FUNCT__ 633 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 634 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 635 { 636 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 637 PetscErrorCode ierr; 638 PetscInt nstash,reallocs; 639 InsertMode addv; 640 641 PetscFunctionBegin; 642 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 643 644 /* make sure all processors are either in INSERTMODE or ADDMODE */ 645 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 646 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 647 mat->insertmode = addv; /* in case this processor had no cache */ 648 649 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 650 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 651 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 #undef __FUNCT__ 656 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 657 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 658 { 659 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 661 PetscErrorCode ierr; 662 PetscMPIInt n; 663 PetscInt i,j,rstart,ncols,flg; 664 PetscInt *row,*col; 665 PetscBool other_disassembled; 666 PetscScalar *val; 667 InsertMode addv = mat->insertmode; 668 669 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 670 671 PetscFunctionBegin; 672 if (!aij->donotstash && !mat->nooffprocentries) { 673 while (1) { 674 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 675 if (!flg) break; 676 677 for (i=0; i<n; ) { 678 /* Now identify the consecutive vals belonging to the same row */ 679 for (j=i,rstart=row[j]; j<n; j++) { 680 if (row[j] != rstart) break; 681 } 682 if (j < n) ncols = j-i; 683 else ncols = n-i; 684 /* Now assemble all these values with a single function call */ 685 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 686 687 i = j; 688 } 689 } 690 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 694 695 /* determine if any processor has disassembled, if so we must 696 also disassemble ourselfs, in order that we may reassemble. */ 697 /* 698 if nonzero structure of submatrix B cannot change then we know that 699 no processor disassembled thus we can skip this stuff 700 */ 701 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 702 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 703 if (mat->was_assembled && !other_disassembled) { 704 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 705 } 706 } 707 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 708 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 709 } 710 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 711 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 712 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 713 714 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 715 716 aij->rowvalues = 0; 717 718 /* used by MatAXPY() */ 719 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 720 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 721 722 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 723 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 724 725 { 726 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 727 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 728 } 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 734 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 735 { 736 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 741 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatZeroRows_MPIAIJ" 747 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 748 { 749 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 750 PetscInt *owners = A->rmap->range; 751 PetscInt n = A->rmap->n; 752 PetscMPIInt size = mat->size; 753 PetscSF sf; 754 PetscInt *lrows; 755 PetscSFNode *rrows; 756 PetscInt lastidx = -1, r, p = 0, len = 0; 757 PetscErrorCode ierr; 758 759 PetscFunctionBegin; 760 /* Create SF where leaves are input rows and roots are owned rows */ 761 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 762 for (r = 0; r < n; ++r) lrows[r] = -1; 763 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 764 for (r = 0; r < N; ++r) { 765 const PetscInt idx = rows[r]; 766 PetscBool found = PETSC_FALSE; 767 /* Trick for efficient searching for sorted rows */ 768 if (lastidx > idx) p = 0; 769 lastidx = idx; 770 for (; p < size; ++p) { 771 if (idx >= owners[p] && idx < owners[p+1]) { 772 rrows[r].rank = p; 773 rrows[r].index = rows[r] - owners[p]; 774 found = PETSC_TRUE; 775 break; 776 } 777 } 778 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 779 } 780 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 781 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 782 /* Collect flags for rows to be zeroed */ 783 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 784 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 785 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 786 /* Compress and put in row numbers */ 787 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 788 /* fix right hand side if needed */ 789 if (x && b) { 790 const PetscScalar *xx; 791 PetscScalar *bb; 792 793 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 794 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 795 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 796 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 797 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 798 } 799 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 800 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 801 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 802 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 803 } else if (diag != 0.0) { 804 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 805 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 806 for (r = 0; r < len; ++r) { 807 const PetscInt row = lrows[r] + A->rmap->rstart; 808 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 809 } 810 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 811 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 812 } else { 813 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 814 } 815 ierr = PetscFree(lrows);CHKERRQ(ierr); 816 { 817 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 818 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 819 } 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 825 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 826 { 827 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 828 PetscErrorCode ierr; 829 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 830 PetscInt i,j,r,m,p = 0,len = 0; 831 PetscInt *lrows,*owners = A->rmap->range; 832 PetscSFNode *rrows; 833 PetscSF sf; 834 const PetscScalar *xx; 835 PetscScalar *bb,*mask; 836 Vec xmask,lmask; 837 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 838 const PetscInt *aj, *ii,*ridx; 839 PetscScalar *aa; 840 #if defined(PETSC_DEBUG) 841 PetscBool found = PETSC_FALSE; 842 #endif 843 844 PetscFunctionBegin; 845 /* Create SF where leaves are input rows and roots are owned rows */ 846 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 847 for (r = 0; r < n; ++r) lrows[r] = -1; 848 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 849 for (r = 0; r < N; ++r) { 850 const PetscInt idx = rows[r]; 851 PetscBool found = PETSC_FALSE; 852 /* Trick for efficient searching for sorted rows */ 853 if (lastidx > idx) p = 0; 854 lastidx = idx; 855 for (; p < size; ++p) { 856 if (idx >= owners[p] && idx < owners[p+1]) { 857 rrows[r].rank = p; 858 rrows[r].index = rows[r] - owners[p]; 859 found = PETSC_TRUE; 860 break; 861 } 862 } 863 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 864 } 865 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 866 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 867 /* Collect flags for rows to be zeroed */ 868 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 869 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 870 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 871 /* Compress and put in row numbers */ 872 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 873 /* zero diagonal part of matrix */ 874 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 875 /* handle off diagonal part of matrix */ 876 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 877 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 878 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 879 for (i=0; i<len; i++) bb[lrows[i]] = 1; 880 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 881 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 884 if (x) { 885 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 888 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 889 } 890 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 891 /* remove zeroed rows of off diagonal matrix */ 892 ii = aij->i; 893 for (i=0; i<len; i++) { 894 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 895 } 896 /* loop over all elements of off process part of matrix zeroing removed columns*/ 897 if (aij->compressedrow.use) { 898 m = aij->compressedrow.nrows; 899 ii = aij->compressedrow.i; 900 ridx = aij->compressedrow.rindex; 901 for (i=0; i<m; i++) { 902 n = ii[i+1] - ii[i]; 903 aj = aij->j + ii[i]; 904 aa = aij->a + ii[i]; 905 906 for (j=0; j<n; j++) { 907 if (PetscAbsScalar(mask[*aj])) { 908 if (b) bb[*ridx] -= *aa*xx[*aj]; 909 *aa = 0.0; 910 } 911 aa++; 912 aj++; 913 } 914 ridx++; 915 } 916 } else { /* do not use compressed row format */ 917 m = l->B->rmap->n; 918 for (i=0; i<m; i++) { 919 n = ii[i+1] - ii[i]; 920 aj = aij->j + ii[i]; 921 aa = aij->a + ii[i]; 922 for (j=0; j<n; j++) { 923 if (PetscAbsScalar(mask[*aj])) { 924 if (b) bb[i] -= *aa*xx[*aj]; 925 *aa = 0.0; 926 } 927 aa++; 928 aj++; 929 } 930 } 931 } 932 if (x) { 933 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 934 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 935 } 936 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 937 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 938 ierr = PetscFree(lrows);CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatMult_MPIAIJ" 944 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 945 { 946 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 947 PetscErrorCode ierr; 948 PetscInt nt; 949 950 PetscFunctionBegin; 951 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 952 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 953 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 954 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 955 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 956 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 957 PetscFunctionReturn(0); 958 } 959 960 #undef __FUNCT__ 961 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 962 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 963 { 964 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 965 PetscErrorCode ierr; 966 967 PetscFunctionBegin; 968 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatMultAdd_MPIAIJ" 974 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 975 { 976 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 977 PetscErrorCode ierr; 978 979 PetscFunctionBegin; 980 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 982 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 989 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 990 { 991 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 992 PetscErrorCode ierr; 993 PetscBool merged; 994 995 PetscFunctionBegin; 996 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 997 /* do nondiagonal part */ 998 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 999 if (!merged) { 1000 /* send it on its way */ 1001 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1002 /* do local part */ 1003 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1004 /* receive remote parts: note this assumes the values are not actually */ 1005 /* added in yy until the next line, */ 1006 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1007 } else { 1008 /* do local part */ 1009 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1010 /* send it on its way */ 1011 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1012 /* values actually were received in the Begin() but we need to call this nop */ 1013 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1014 } 1015 PetscFunctionReturn(0); 1016 } 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1020 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1021 { 1022 MPI_Comm comm; 1023 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1024 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1025 IS Me,Notme; 1026 PetscErrorCode ierr; 1027 PetscInt M,N,first,last,*notme,i; 1028 PetscMPIInt size; 1029 1030 PetscFunctionBegin; 1031 /* Easy test: symmetric diagonal block */ 1032 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1033 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1034 if (!*f) PetscFunctionReturn(0); 1035 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1036 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1037 if (size == 1) PetscFunctionReturn(0); 1038 1039 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1040 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1041 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1042 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1043 for (i=0; i<first; i++) notme[i] = i; 1044 for (i=last; i<M; i++) notme[i-last+first] = i; 1045 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1046 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1047 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1048 Aoff = Aoffs[0]; 1049 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1050 Boff = Boffs[0]; 1051 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1052 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1053 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1054 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1055 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1056 ierr = PetscFree(notme);CHKERRQ(ierr); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1062 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1063 { 1064 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1065 PetscErrorCode ierr; 1066 1067 PetscFunctionBegin; 1068 /* do nondiagonal part */ 1069 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1070 /* send it on its way */ 1071 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1072 /* do local part */ 1073 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1074 /* receive remote parts */ 1075 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 /* 1080 This only works correctly for square matrices where the subblock A->A is the 1081 diagonal block 1082 */ 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1085 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1086 { 1087 PetscErrorCode ierr; 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 1090 PetscFunctionBegin; 1091 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1092 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1093 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatScale_MPIAIJ" 1099 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1100 { 1101 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1106 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatDestroy_MPIAIJ" 1112 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1113 { 1114 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1115 PetscErrorCode ierr; 1116 1117 PetscFunctionBegin; 1118 #if defined(PETSC_USE_LOG) 1119 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1120 #endif 1121 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1122 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1123 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1124 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1125 #if defined(PETSC_USE_CTABLE) 1126 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1127 #else 1128 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1129 #endif 1130 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1132 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1133 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1134 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1135 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1136 1137 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1151 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1152 { 1153 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1154 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1155 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1156 PetscErrorCode ierr; 1157 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1158 int fd; 1159 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1160 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1161 PetscScalar *column_values; 1162 PetscInt message_count,flowcontrolcount; 1163 FILE *file; 1164 1165 PetscFunctionBegin; 1166 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1167 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1168 nz = A->nz + B->nz; 1169 if (!rank) { 1170 header[0] = MAT_FILE_CLASSID; 1171 header[1] = mat->rmap->N; 1172 header[2] = mat->cmap->N; 1173 1174 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1175 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1176 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1177 /* get largest number of rows any processor has */ 1178 rlen = mat->rmap->n; 1179 range = mat->rmap->range; 1180 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1181 } else { 1182 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1183 rlen = mat->rmap->n; 1184 } 1185 1186 /* load up the local row counts */ 1187 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1188 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1189 1190 /* store the row lengths to the file */ 1191 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1192 if (!rank) { 1193 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1194 for (i=1; i<size; i++) { 1195 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1196 rlen = range[i+1] - range[i]; 1197 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1198 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1199 } 1200 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1201 } else { 1202 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1203 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1204 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1205 } 1206 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1207 1208 /* load up the local column indices */ 1209 nzmax = nz; /* th processor needs space a largest processor needs */ 1210 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1211 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1212 cnt = 0; 1213 for (i=0; i<mat->rmap->n; i++) { 1214 for (j=B->i[i]; j<B->i[i+1]; j++) { 1215 if ((col = garray[B->j[j]]) > cstart) break; 1216 column_indices[cnt++] = col; 1217 } 1218 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1219 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1220 } 1221 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1222 1223 /* store the column indices to the file */ 1224 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1225 if (!rank) { 1226 MPI_Status status; 1227 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1228 for (i=1; i<size; i++) { 1229 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1230 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1231 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1232 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1233 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1234 } 1235 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1236 } else { 1237 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1238 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1240 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1241 } 1242 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1243 1244 /* load up the local column values */ 1245 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1246 cnt = 0; 1247 for (i=0; i<mat->rmap->n; i++) { 1248 for (j=B->i[i]; j<B->i[i+1]; j++) { 1249 if (garray[B->j[j]] > cstart) break; 1250 column_values[cnt++] = B->a[j]; 1251 } 1252 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1253 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1254 } 1255 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1256 1257 /* store the column values to the file */ 1258 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1259 if (!rank) { 1260 MPI_Status status; 1261 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1262 for (i=1; i<size; i++) { 1263 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1264 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1265 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1266 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1267 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1268 } 1269 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1270 } else { 1271 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1272 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1273 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1274 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1275 } 1276 ierr = PetscFree(column_values);CHKERRQ(ierr); 1277 1278 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1279 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #include <petscdraw.h> 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1286 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1287 { 1288 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1289 PetscErrorCode ierr; 1290 PetscMPIInt rank = aij->rank,size = aij->size; 1291 PetscBool isdraw,iascii,isbinary; 1292 PetscViewer sviewer; 1293 PetscViewerFormat format; 1294 1295 PetscFunctionBegin; 1296 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1297 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1298 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1299 if (iascii) { 1300 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1301 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1302 MatInfo info; 1303 PetscBool inodes; 1304 1305 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1306 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1307 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1308 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1309 if (!inodes) { 1310 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1311 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1312 } else { 1313 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1314 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1315 } 1316 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1317 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1318 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1320 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1321 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1322 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1323 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1326 PetscInt inodecount,inodelimit,*inodes; 1327 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1328 if (inodes) { 1329 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1330 } else { 1331 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1332 } 1333 PetscFunctionReturn(0); 1334 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1335 PetscFunctionReturn(0); 1336 } 1337 } else if (isbinary) { 1338 if (size == 1) { 1339 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1340 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1341 } else { 1342 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1343 } 1344 PetscFunctionReturn(0); 1345 } else if (isdraw) { 1346 PetscDraw draw; 1347 PetscBool isnull; 1348 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1349 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1350 } 1351 1352 if (size == 1) { 1353 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1354 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1355 } else { 1356 /* assemble the entire matrix onto first processor. */ 1357 Mat A; 1358 Mat_SeqAIJ *Aloc; 1359 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1360 MatScalar *a; 1361 1362 if (mat->rmap->N > 1024) { 1363 PetscBool flg = PETSC_FALSE; 1364 1365 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1366 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1367 } 1368 1369 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1370 if (!rank) { 1371 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1372 } else { 1373 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1374 } 1375 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1376 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1377 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1378 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1379 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1380 1381 /* copy over the A part */ 1382 Aloc = (Mat_SeqAIJ*)aij->A->data; 1383 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1384 row = mat->rmap->rstart; 1385 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1386 for (i=0; i<m; i++) { 1387 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1388 row++; 1389 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1390 } 1391 aj = Aloc->j; 1392 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1393 1394 /* copy over the B part */ 1395 Aloc = (Mat_SeqAIJ*)aij->B->data; 1396 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1397 row = mat->rmap->rstart; 1398 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1399 ct = cols; 1400 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1401 for (i=0; i<m; i++) { 1402 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1403 row++; 1404 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1405 } 1406 ierr = PetscFree(ct);CHKERRQ(ierr); 1407 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1408 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1409 /* 1410 Everyone has to call to draw the matrix since the graphics waits are 1411 synchronized across all processors that share the PetscDraw object 1412 */ 1413 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1414 if (!rank) { 1415 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1416 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1417 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1418 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1419 } 1420 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1421 ierr = MatDestroy(&A);CHKERRQ(ierr); 1422 } 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatView_MPIAIJ" 1428 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1429 { 1430 PetscErrorCode ierr; 1431 PetscBool iascii,isdraw,issocket,isbinary; 1432 1433 PetscFunctionBegin; 1434 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1435 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1436 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1437 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1438 if (iascii || isdraw || isbinary || issocket) { 1439 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1440 } 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatSOR_MPIAIJ" 1446 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1447 { 1448 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1449 PetscErrorCode ierr; 1450 Vec bb1 = 0; 1451 PetscBool hasop; 1452 1453 PetscFunctionBegin; 1454 if (flag == SOR_APPLY_UPPER) { 1455 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1456 PetscFunctionReturn(0); 1457 } 1458 1459 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1460 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1461 } 1462 1463 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1464 if (flag & SOR_ZERO_INITIAL_GUESS) { 1465 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1466 its--; 1467 } 1468 1469 while (its--) { 1470 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1472 1473 /* update rhs: bb1 = bb - B*x */ 1474 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1475 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1476 1477 /* local sweep */ 1478 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1479 } 1480 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1481 if (flag & SOR_ZERO_INITIAL_GUESS) { 1482 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1483 its--; 1484 } 1485 while (its--) { 1486 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1488 1489 /* update rhs: bb1 = bb - B*x */ 1490 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1491 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1492 1493 /* local sweep */ 1494 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1495 } 1496 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1497 if (flag & SOR_ZERO_INITIAL_GUESS) { 1498 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1499 its--; 1500 } 1501 while (its--) { 1502 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1504 1505 /* update rhs: bb1 = bb - B*x */ 1506 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1507 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1508 1509 /* local sweep */ 1510 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1511 } 1512 } else if (flag & SOR_EISENSTAT) { 1513 Vec xx1; 1514 1515 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1516 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1517 1518 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 if (!mat->diag) { 1521 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1522 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1523 } 1524 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1525 if (hasop) { 1526 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1527 } else { 1528 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1529 } 1530 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1531 1532 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1533 1534 /* local sweep */ 1535 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1536 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1537 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1538 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1539 1540 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatPermute_MPIAIJ" 1546 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1547 { 1548 Mat aA,aB,Aperm; 1549 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1550 PetscScalar *aa,*ba; 1551 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1552 PetscSF rowsf,sf; 1553 IS parcolp = NULL; 1554 PetscBool done; 1555 PetscErrorCode ierr; 1556 1557 PetscFunctionBegin; 1558 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1559 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1560 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1561 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1562 1563 /* Invert row permutation to find out where my rows should go */ 1564 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1565 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1566 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1567 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1568 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1569 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1570 1571 /* Invert column permutation to find out where my columns should go */ 1572 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1573 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1574 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1575 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1576 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1577 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1578 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1579 1580 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1581 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1582 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1583 1584 /* Find out where my gcols should go */ 1585 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1586 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1587 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1588 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1589 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1590 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1591 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1592 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1593 1594 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1595 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1596 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1597 for (i=0; i<m; i++) { 1598 PetscInt row = rdest[i],rowner; 1599 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1600 for (j=ai[i]; j<ai[i+1]; j++) { 1601 PetscInt cowner,col = cdest[aj[j]]; 1602 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1603 if (rowner == cowner) dnnz[i]++; 1604 else onnz[i]++; 1605 } 1606 for (j=bi[i]; j<bi[i+1]; j++) { 1607 PetscInt cowner,col = gcdest[bj[j]]; 1608 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1609 if (rowner == cowner) dnnz[i]++; 1610 else onnz[i]++; 1611 } 1612 } 1613 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1614 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1615 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1616 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1617 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1618 1619 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1620 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1621 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1622 for (i=0; i<m; i++) { 1623 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1624 PetscInt j0,rowlen; 1625 rowlen = ai[i+1] - ai[i]; 1626 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1627 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1628 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1629 } 1630 rowlen = bi[i+1] - bi[i]; 1631 for (j0=j=0; j<rowlen; j0=j) { 1632 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1633 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1634 } 1635 } 1636 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1637 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1638 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1639 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1640 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1641 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1642 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1643 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1644 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1645 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1646 *B = Aperm; 1647 PetscFunctionReturn(0); 1648 } 1649 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1652 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1653 { 1654 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1655 Mat A = mat->A,B = mat->B; 1656 PetscErrorCode ierr; 1657 PetscReal isend[5],irecv[5]; 1658 1659 PetscFunctionBegin; 1660 info->block_size = 1.0; 1661 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1662 1663 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1664 isend[3] = info->memory; isend[4] = info->mallocs; 1665 1666 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1667 1668 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1669 isend[3] += info->memory; isend[4] += info->mallocs; 1670 if (flag == MAT_LOCAL) { 1671 info->nz_used = isend[0]; 1672 info->nz_allocated = isend[1]; 1673 info->nz_unneeded = isend[2]; 1674 info->memory = isend[3]; 1675 info->mallocs = isend[4]; 1676 } else if (flag == MAT_GLOBAL_MAX) { 1677 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1678 1679 info->nz_used = irecv[0]; 1680 info->nz_allocated = irecv[1]; 1681 info->nz_unneeded = irecv[2]; 1682 info->memory = irecv[3]; 1683 info->mallocs = irecv[4]; 1684 } else if (flag == MAT_GLOBAL_SUM) { 1685 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1686 1687 info->nz_used = irecv[0]; 1688 info->nz_allocated = irecv[1]; 1689 info->nz_unneeded = irecv[2]; 1690 info->memory = irecv[3]; 1691 info->mallocs = irecv[4]; 1692 } 1693 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1694 info->fill_ratio_needed = 0; 1695 info->factor_mallocs = 0; 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatSetOption_MPIAIJ" 1701 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1702 { 1703 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1704 PetscErrorCode ierr; 1705 1706 PetscFunctionBegin; 1707 switch (op) { 1708 case MAT_NEW_NONZERO_LOCATIONS: 1709 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1710 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1711 case MAT_KEEP_NONZERO_PATTERN: 1712 case MAT_NEW_NONZERO_LOCATION_ERR: 1713 case MAT_USE_INODES: 1714 case MAT_IGNORE_ZERO_ENTRIES: 1715 MatCheckPreallocated(A,1); 1716 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1717 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1718 break; 1719 case MAT_ROW_ORIENTED: 1720 a->roworiented = flg; 1721 1722 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1723 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1724 break; 1725 case MAT_NEW_DIAGONALS: 1726 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1727 break; 1728 case MAT_IGNORE_OFF_PROC_ENTRIES: 1729 a->donotstash = flg; 1730 break; 1731 case MAT_SPD: 1732 A->spd_set = PETSC_TRUE; 1733 A->spd = flg; 1734 if (flg) { 1735 A->symmetric = PETSC_TRUE; 1736 A->structurally_symmetric = PETSC_TRUE; 1737 A->symmetric_set = PETSC_TRUE; 1738 A->structurally_symmetric_set = PETSC_TRUE; 1739 } 1740 break; 1741 case MAT_SYMMETRIC: 1742 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743 break; 1744 case MAT_STRUCTURALLY_SYMMETRIC: 1745 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1746 break; 1747 case MAT_HERMITIAN: 1748 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1749 break; 1750 case MAT_SYMMETRY_ETERNAL: 1751 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1752 break; 1753 default: 1754 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1755 } 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatGetRow_MPIAIJ" 1761 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1762 { 1763 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1764 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1765 PetscErrorCode ierr; 1766 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1767 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1768 PetscInt *cmap,*idx_p; 1769 1770 PetscFunctionBegin; 1771 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1772 mat->getrowactive = PETSC_TRUE; 1773 1774 if (!mat->rowvalues && (idx || v)) { 1775 /* 1776 allocate enough space to hold information from the longest row. 1777 */ 1778 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1779 PetscInt max = 1,tmp; 1780 for (i=0; i<matin->rmap->n; i++) { 1781 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1782 if (max < tmp) max = tmp; 1783 } 1784 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1785 } 1786 1787 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1788 lrow = row - rstart; 1789 1790 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1791 if (!v) {pvA = 0; pvB = 0;} 1792 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1793 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1794 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1795 nztot = nzA + nzB; 1796 1797 cmap = mat->garray; 1798 if (v || idx) { 1799 if (nztot) { 1800 /* Sort by increasing column numbers, assuming A and B already sorted */ 1801 PetscInt imark = -1; 1802 if (v) { 1803 *v = v_p = mat->rowvalues; 1804 for (i=0; i<nzB; i++) { 1805 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1806 else break; 1807 } 1808 imark = i; 1809 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1810 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1811 } 1812 if (idx) { 1813 *idx = idx_p = mat->rowindices; 1814 if (imark > -1) { 1815 for (i=0; i<imark; i++) { 1816 idx_p[i] = cmap[cworkB[i]]; 1817 } 1818 } else { 1819 for (i=0; i<nzB; i++) { 1820 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1821 else break; 1822 } 1823 imark = i; 1824 } 1825 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1826 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1827 } 1828 } else { 1829 if (idx) *idx = 0; 1830 if (v) *v = 0; 1831 } 1832 } 1833 *nz = nztot; 1834 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1835 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1836 PetscFunctionReturn(0); 1837 } 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1841 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1842 { 1843 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1844 1845 PetscFunctionBegin; 1846 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1847 aij->getrowactive = PETSC_FALSE; 1848 PetscFunctionReturn(0); 1849 } 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatNorm_MPIAIJ" 1853 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1854 { 1855 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1856 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1857 PetscErrorCode ierr; 1858 PetscInt i,j,cstart = mat->cmap->rstart; 1859 PetscReal sum = 0.0; 1860 MatScalar *v; 1861 1862 PetscFunctionBegin; 1863 if (aij->size == 1) { 1864 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1865 } else { 1866 if (type == NORM_FROBENIUS) { 1867 v = amat->a; 1868 for (i=0; i<amat->nz; i++) { 1869 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1870 } 1871 v = bmat->a; 1872 for (i=0; i<bmat->nz; i++) { 1873 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1874 } 1875 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1876 *norm = PetscSqrtReal(*norm); 1877 } else if (type == NORM_1) { /* max column norm */ 1878 PetscReal *tmp,*tmp2; 1879 PetscInt *jj,*garray = aij->garray; 1880 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1881 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1882 *norm = 0.0; 1883 v = amat->a; jj = amat->j; 1884 for (j=0; j<amat->nz; j++) { 1885 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1886 } 1887 v = bmat->a; jj = bmat->j; 1888 for (j=0; j<bmat->nz; j++) { 1889 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1890 } 1891 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1892 for (j=0; j<mat->cmap->N; j++) { 1893 if (tmp2[j] > *norm) *norm = tmp2[j]; 1894 } 1895 ierr = PetscFree(tmp);CHKERRQ(ierr); 1896 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1897 } else if (type == NORM_INFINITY) { /* max row norm */ 1898 PetscReal ntemp = 0.0; 1899 for (j=0; j<aij->A->rmap->n; j++) { 1900 v = amat->a + amat->i[j]; 1901 sum = 0.0; 1902 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1903 sum += PetscAbsScalar(*v); v++; 1904 } 1905 v = bmat->a + bmat->i[j]; 1906 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1907 sum += PetscAbsScalar(*v); v++; 1908 } 1909 if (sum > ntemp) ntemp = sum; 1910 } 1911 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1912 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatTranspose_MPIAIJ" 1919 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1920 { 1921 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1922 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1923 PetscErrorCode ierr; 1924 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1925 PetscInt cstart = A->cmap->rstart,ncol; 1926 Mat B; 1927 MatScalar *array; 1928 1929 PetscFunctionBegin; 1930 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1931 1932 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1933 ai = Aloc->i; aj = Aloc->j; 1934 bi = Bloc->i; bj = Bloc->j; 1935 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1936 PetscInt *d_nnz,*g_nnz,*o_nnz; 1937 PetscSFNode *oloc; 1938 PETSC_UNUSED PetscSF sf; 1939 1940 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1941 /* compute d_nnz for preallocation */ 1942 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1943 for (i=0; i<ai[ma]; i++) { 1944 d_nnz[aj[i]]++; 1945 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1946 } 1947 /* compute local off-diagonal contributions */ 1948 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1949 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1950 /* map those to global */ 1951 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1952 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1953 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1954 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1955 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1956 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1957 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1958 1959 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1960 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1961 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 1962 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1964 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1965 } else { 1966 B = *matout; 1967 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1968 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1969 } 1970 1971 /* copy over the A part */ 1972 array = Aloc->a; 1973 row = A->rmap->rstart; 1974 for (i=0; i<ma; i++) { 1975 ncol = ai[i+1]-ai[i]; 1976 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1977 row++; 1978 array += ncol; aj += ncol; 1979 } 1980 aj = Aloc->j; 1981 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1982 1983 /* copy over the B part */ 1984 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1985 array = Bloc->a; 1986 row = A->rmap->rstart; 1987 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1988 cols_tmp = cols; 1989 for (i=0; i<mb; i++) { 1990 ncol = bi[i+1]-bi[i]; 1991 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1992 row++; 1993 array += ncol; cols_tmp += ncol; 1994 } 1995 ierr = PetscFree(cols);CHKERRQ(ierr); 1996 1997 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1998 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1999 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2000 *matout = B; 2001 } else { 2002 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2003 } 2004 PetscFunctionReturn(0); 2005 } 2006 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2009 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2010 { 2011 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2012 Mat a = aij->A,b = aij->B; 2013 PetscErrorCode ierr; 2014 PetscInt s1,s2,s3; 2015 2016 PetscFunctionBegin; 2017 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2018 if (rr) { 2019 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2020 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2021 /* Overlap communication with computation. */ 2022 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2023 } 2024 if (ll) { 2025 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2026 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2027 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2028 } 2029 /* scale the diagonal block */ 2030 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2031 2032 if (rr) { 2033 /* Do a scatter end and then right scale the off-diagonal block */ 2034 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2035 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2036 } 2037 PetscFunctionReturn(0); 2038 } 2039 2040 #undef __FUNCT__ 2041 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2042 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2043 { 2044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2045 PetscErrorCode ierr; 2046 2047 PetscFunctionBegin; 2048 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2049 PetscFunctionReturn(0); 2050 } 2051 2052 #undef __FUNCT__ 2053 #define __FUNCT__ "MatEqual_MPIAIJ" 2054 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2055 { 2056 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2057 Mat a,b,c,d; 2058 PetscBool flg; 2059 PetscErrorCode ierr; 2060 2061 PetscFunctionBegin; 2062 a = matA->A; b = matA->B; 2063 c = matB->A; d = matB->B; 2064 2065 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2066 if (flg) { 2067 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2068 } 2069 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2070 PetscFunctionReturn(0); 2071 } 2072 2073 #undef __FUNCT__ 2074 #define __FUNCT__ "MatCopy_MPIAIJ" 2075 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2076 { 2077 PetscErrorCode ierr; 2078 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2079 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2080 2081 PetscFunctionBegin; 2082 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2083 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2084 /* because of the column compression in the off-processor part of the matrix a->B, 2085 the number of columns in a->B and b->B may be different, hence we cannot call 2086 the MatCopy() directly on the two parts. If need be, we can provide a more 2087 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2088 then copying the submatrices */ 2089 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2090 } else { 2091 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2092 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2093 } 2094 PetscFunctionReturn(0); 2095 } 2096 2097 #undef __FUNCT__ 2098 #define __FUNCT__ "MatSetUp_MPIAIJ" 2099 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2100 { 2101 PetscErrorCode ierr; 2102 2103 PetscFunctionBegin; 2104 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2110 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2111 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2112 { 2113 PetscInt i,m=Y->rmap->N; 2114 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2115 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2116 const PetscInt *xi = x->i,*yi = y->i; 2117 2118 PetscFunctionBegin; 2119 /* Set the number of nonzeros in the new matrix */ 2120 for (i=0; i<m; i++) { 2121 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2122 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2123 nnz[i] = 0; 2124 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2125 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2126 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2127 nnz[i]++; 2128 } 2129 for (; k<nzy; k++) nnz[i]++; 2130 } 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatAXPY_MPIAIJ" 2136 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2137 { 2138 PetscErrorCode ierr; 2139 PetscInt i; 2140 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2141 PetscBLASInt bnz,one=1; 2142 Mat_SeqAIJ *x,*y; 2143 2144 PetscFunctionBegin; 2145 if (str == SAME_NONZERO_PATTERN) { 2146 PetscScalar alpha = a; 2147 x = (Mat_SeqAIJ*)xx->A->data; 2148 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2149 y = (Mat_SeqAIJ*)yy->A->data; 2150 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2151 x = (Mat_SeqAIJ*)xx->B->data; 2152 y = (Mat_SeqAIJ*)yy->B->data; 2153 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2154 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2155 } else if (str == SUBSET_NONZERO_PATTERN) { 2156 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2157 2158 x = (Mat_SeqAIJ*)xx->B->data; 2159 y = (Mat_SeqAIJ*)yy->B->data; 2160 if (y->xtoy && y->XtoY != xx->B) { 2161 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2162 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2163 } 2164 if (!y->xtoy) { /* get xtoy */ 2165 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2166 y->XtoY = xx->B; 2167 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2168 } 2169 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2170 } else { 2171 Mat B; 2172 PetscInt *nnz_d,*nnz_o; 2173 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2174 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2175 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2176 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2177 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2178 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2179 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2180 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2181 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2182 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2183 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2184 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2185 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2186 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2187 } 2188 PetscFunctionReturn(0); 2189 } 2190 2191 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2192 2193 #undef __FUNCT__ 2194 #define __FUNCT__ "MatConjugate_MPIAIJ" 2195 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2196 { 2197 #if defined(PETSC_USE_COMPLEX) 2198 PetscErrorCode ierr; 2199 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2200 2201 PetscFunctionBegin; 2202 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2203 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2204 #else 2205 PetscFunctionBegin; 2206 #endif 2207 PetscFunctionReturn(0); 2208 } 2209 2210 #undef __FUNCT__ 2211 #define __FUNCT__ "MatRealPart_MPIAIJ" 2212 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2213 { 2214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2215 PetscErrorCode ierr; 2216 2217 PetscFunctionBegin; 2218 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2219 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2220 PetscFunctionReturn(0); 2221 } 2222 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2225 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2226 { 2227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2228 PetscErrorCode ierr; 2229 2230 PetscFunctionBegin; 2231 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2232 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2233 PetscFunctionReturn(0); 2234 } 2235 2236 #if defined(PETSC_HAVE_PBGL) 2237 2238 #include <boost/parallel/mpi/bsp_process_group.hpp> 2239 #include <boost/graph/distributed/ilu_default_graph.hpp> 2240 #include <boost/graph/distributed/ilu_0_block.hpp> 2241 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2242 #include <boost/graph/distributed/petsc/interface.hpp> 2243 #include <boost/multi_array.hpp> 2244 #include <boost/parallel/distributed_property_map->hpp> 2245 2246 #undef __FUNCT__ 2247 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2248 /* 2249 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2250 */ 2251 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2252 { 2253 namespace petsc = boost::distributed::petsc; 2254 2255 namespace graph_dist = boost::graph::distributed; 2256 using boost::graph::distributed::ilu_default::process_group_type; 2257 using boost::graph::ilu_permuted; 2258 2259 PetscBool row_identity, col_identity; 2260 PetscContainer c; 2261 PetscInt m, n, M, N; 2262 PetscErrorCode ierr; 2263 2264 PetscFunctionBegin; 2265 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2266 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2267 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2268 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2269 2270 process_group_type pg; 2271 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2272 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2273 lgraph_type& level_graph = *lgraph_p; 2274 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2275 2276 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2277 ilu_permuted(level_graph); 2278 2279 /* put together the new matrix */ 2280 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2281 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2282 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2283 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2284 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2285 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2286 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2287 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2288 2289 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2290 ierr = PetscContainerSetPointer(c, lgraph_p); 2291 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2292 ierr = PetscContainerDestroy(&c); 2293 PetscFunctionReturn(0); 2294 } 2295 2296 #undef __FUNCT__ 2297 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2298 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2299 { 2300 PetscFunctionBegin; 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatSolve_MPIAIJ" 2306 /* 2307 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2308 */ 2309 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2310 { 2311 namespace graph_dist = boost::graph::distributed; 2312 2313 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2314 lgraph_type *lgraph_p; 2315 PetscContainer c; 2316 PetscErrorCode ierr; 2317 2318 PetscFunctionBegin; 2319 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2320 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2321 ierr = VecCopy(b, x);CHKERRQ(ierr); 2322 2323 PetscScalar *array_x; 2324 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2325 PetscInt sx; 2326 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2327 2328 PetscScalar *array_b; 2329 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2330 PetscInt sb; 2331 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2332 2333 lgraph_type& level_graph = *lgraph_p; 2334 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2335 2336 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2337 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2338 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2339 2340 typedef boost::iterator_property_map<array_ref_type::iterator, 2341 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2342 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2343 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2344 2345 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2346 PetscFunctionReturn(0); 2347 } 2348 #endif 2349 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatDestroy_MatRedundant" 2352 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2353 { 2354 PetscErrorCode ierr; 2355 Mat_Redundant *redund; 2356 PetscInt i; 2357 PetscMPIInt size; 2358 2359 PetscFunctionBegin; 2360 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2361 if (size == 1) { 2362 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2363 redund = a->redundant; 2364 } else { 2365 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2366 redund = a->redundant; 2367 } 2368 if (redund){ 2369 if (redund->matseq) { /* via MatGetSubMatrices() */ 2370 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2371 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2372 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2373 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2374 } else { 2375 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2376 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2377 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2378 for (i=0; i<redund->nrecvs; i++) { 2379 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2380 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2381 } 2382 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2383 } 2384 2385 if (redund->psubcomm) { 2386 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2387 } 2388 ierr = redund->Destroy(A);CHKERRQ(ierr); 2389 ierr = PetscFree(redund);CHKERRQ(ierr); 2390 } 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2396 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2397 { 2398 PetscMPIInt rank,size; 2399 MPI_Comm comm; 2400 PetscErrorCode ierr; 2401 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2402 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2403 PetscInt *rowrange = mat->rmap->range; 2404 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2405 Mat A = aij->A,B=aij->B,C=*matredundant; 2406 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2407 PetscScalar *sbuf_a; 2408 PetscInt nzlocal=a->nz+b->nz; 2409 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2410 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2411 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2412 MatScalar *aworkA,*aworkB; 2413 PetscScalar *vals; 2414 PetscMPIInt tag1,tag2,tag3,imdex; 2415 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2416 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2417 MPI_Status recv_status,*send_status; 2418 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2419 PetscInt **rbuf_j=NULL; 2420 PetscScalar **rbuf_a=NULL; 2421 Mat_Redundant *redund =NULL; 2422 2423 PetscFunctionBegin; 2424 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2425 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2426 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2427 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2428 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2429 2430 if (reuse == MAT_REUSE_MATRIX) { 2431 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2432 if (subsize == 1) { 2433 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2434 redund = c->redundant; 2435 } else { 2436 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2437 redund = c->redundant; 2438 } 2439 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2440 2441 nsends = redund->nsends; 2442 nrecvs = redund->nrecvs; 2443 send_rank = redund->send_rank; 2444 recv_rank = redund->recv_rank; 2445 sbuf_nz = redund->sbuf_nz; 2446 rbuf_nz = redund->rbuf_nz; 2447 sbuf_j = redund->sbuf_j; 2448 sbuf_a = redund->sbuf_a; 2449 rbuf_j = redund->rbuf_j; 2450 rbuf_a = redund->rbuf_a; 2451 } 2452 2453 if (reuse == MAT_INITIAL_MATRIX) { 2454 PetscInt nleftover,np_subcomm; 2455 2456 /* get the destination processors' id send_rank, nsends and nrecvs */ 2457 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2458 2459 np_subcomm = size/nsubcomm; 2460 nleftover = size - nsubcomm*np_subcomm; 2461 2462 /* block of codes below is specific for INTERLACED */ 2463 /* ------------------------------------------------*/ 2464 nsends = 0; nrecvs = 0; 2465 for (i=0; i<size; i++) { 2466 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2467 send_rank[nsends++] = i; 2468 recv_rank[nrecvs++] = i; 2469 } 2470 } 2471 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2472 i = size-nleftover-1; 2473 j = 0; 2474 while (j < nsubcomm - nleftover) { 2475 send_rank[nsends++] = i; 2476 i--; j++; 2477 } 2478 } 2479 2480 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2481 for (i=0; i<nleftover; i++) { 2482 recv_rank[nrecvs++] = size-nleftover+i; 2483 } 2484 } 2485 /*----------------------------------------------*/ 2486 2487 /* allocate sbuf_j, sbuf_a */ 2488 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2489 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2490 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2491 /* 2492 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2493 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2494 */ 2495 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2496 2497 /* copy mat's local entries into the buffers */ 2498 if (reuse == MAT_INITIAL_MATRIX) { 2499 rownz_max = 0; 2500 rptr = sbuf_j; 2501 cols = sbuf_j + rend-rstart + 1; 2502 vals = sbuf_a; 2503 rptr[0] = 0; 2504 for (i=0; i<rend-rstart; i++) { 2505 row = i + rstart; 2506 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2507 ncols = nzA + nzB; 2508 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2509 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2510 /* load the column indices for this row into cols */ 2511 lwrite = 0; 2512 for (l=0; l<nzB; l++) { 2513 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2514 vals[lwrite] = aworkB[l]; 2515 cols[lwrite++] = ctmp; 2516 } 2517 } 2518 for (l=0; l<nzA; l++) { 2519 vals[lwrite] = aworkA[l]; 2520 cols[lwrite++] = cstart + cworkA[l]; 2521 } 2522 for (l=0; l<nzB; l++) { 2523 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2524 vals[lwrite] = aworkB[l]; 2525 cols[lwrite++] = ctmp; 2526 } 2527 } 2528 vals += ncols; 2529 cols += ncols; 2530 rptr[i+1] = rptr[i] + ncols; 2531 if (rownz_max < ncols) rownz_max = ncols; 2532 } 2533 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2534 } else { /* only copy matrix values into sbuf_a */ 2535 rptr = sbuf_j; 2536 vals = sbuf_a; 2537 rptr[0] = 0; 2538 for (i=0; i<rend-rstart; i++) { 2539 row = i + rstart; 2540 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2541 ncols = nzA + nzB; 2542 cworkB = b->j + b->i[i]; 2543 aworkA = a->a + a->i[i]; 2544 aworkB = b->a + b->i[i]; 2545 lwrite = 0; 2546 for (l=0; l<nzB; l++) { 2547 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2548 } 2549 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2550 for (l=0; l<nzB; l++) { 2551 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2552 } 2553 vals += ncols; 2554 rptr[i+1] = rptr[i] + ncols; 2555 } 2556 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2557 2558 /* send nzlocal to others, and recv other's nzlocal */ 2559 /*--------------------------------------------------*/ 2560 if (reuse == MAT_INITIAL_MATRIX) { 2561 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2562 2563 s_waits2 = s_waits3 + nsends; 2564 s_waits1 = s_waits2 + nsends; 2565 r_waits1 = s_waits1 + nsends; 2566 r_waits2 = r_waits1 + nrecvs; 2567 r_waits3 = r_waits2 + nrecvs; 2568 } else { 2569 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2570 2571 r_waits3 = s_waits3 + nsends; 2572 } 2573 2574 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2575 if (reuse == MAT_INITIAL_MATRIX) { 2576 /* get new tags to keep the communication clean */ 2577 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2578 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2579 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2580 2581 /* post receives of other's nzlocal */ 2582 for (i=0; i<nrecvs; i++) { 2583 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2584 } 2585 /* send nzlocal to others */ 2586 for (i=0; i<nsends; i++) { 2587 sbuf_nz[i] = nzlocal; 2588 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2589 } 2590 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2591 count = nrecvs; 2592 while (count) { 2593 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2594 2595 recv_rank[imdex] = recv_status.MPI_SOURCE; 2596 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2597 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2598 2599 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2600 2601 rbuf_nz[imdex] += i + 2; 2602 2603 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2604 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2605 count--; 2606 } 2607 /* wait on sends of nzlocal */ 2608 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2609 /* send mat->i,j to others, and recv from other's */ 2610 /*------------------------------------------------*/ 2611 for (i=0; i<nsends; i++) { 2612 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2613 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2614 } 2615 /* wait on receives of mat->i,j */ 2616 /*------------------------------*/ 2617 count = nrecvs; 2618 while (count) { 2619 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2620 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2621 count--; 2622 } 2623 /* wait on sends of mat->i,j */ 2624 /*---------------------------*/ 2625 if (nsends) { 2626 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2627 } 2628 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2629 2630 /* post receives, send and receive mat->a */ 2631 /*----------------------------------------*/ 2632 for (imdex=0; imdex<nrecvs; imdex++) { 2633 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2634 } 2635 for (i=0; i<nsends; i++) { 2636 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2637 } 2638 count = nrecvs; 2639 while (count) { 2640 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2641 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2642 count--; 2643 } 2644 if (nsends) { 2645 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2646 } 2647 2648 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2649 2650 /* create redundant matrix */ 2651 /*-------------------------*/ 2652 if (reuse == MAT_INITIAL_MATRIX) { 2653 const PetscInt *range; 2654 PetscInt rstart_sub,rend_sub,mloc_sub; 2655 2656 /* compute rownz_max for preallocation */ 2657 for (imdex=0; imdex<nrecvs; imdex++) { 2658 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2659 rptr = rbuf_j[imdex]; 2660 for (i=0; i<j; i++) { 2661 ncols = rptr[i+1] - rptr[i]; 2662 if (rownz_max < ncols) rownz_max = ncols; 2663 } 2664 } 2665 2666 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2667 2668 /* get local size of redundant matrix 2669 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2670 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2671 rstart_sub = range[nsubcomm*subrank]; 2672 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2673 rend_sub = range[nsubcomm*(subrank+1)]; 2674 } else { 2675 rend_sub = mat->rmap->N; 2676 } 2677 mloc_sub = rend_sub - rstart_sub; 2678 2679 if (M == N) { 2680 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2681 } else { /* non-square matrix */ 2682 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2683 } 2684 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2685 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2686 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2687 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2688 } else { 2689 C = *matredundant; 2690 } 2691 2692 /* insert local matrix entries */ 2693 rptr = sbuf_j; 2694 cols = sbuf_j + rend-rstart + 1; 2695 vals = sbuf_a; 2696 for (i=0; i<rend-rstart; i++) { 2697 row = i + rstart; 2698 ncols = rptr[i+1] - rptr[i]; 2699 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2700 vals += ncols; 2701 cols += ncols; 2702 } 2703 /* insert received matrix entries */ 2704 for (imdex=0; imdex<nrecvs; imdex++) { 2705 rstart = rowrange[recv_rank[imdex]]; 2706 rend = rowrange[recv_rank[imdex]+1]; 2707 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2708 rptr = rbuf_j[imdex]; 2709 cols = rbuf_j[imdex] + rend-rstart + 1; 2710 vals = rbuf_a[imdex]; 2711 for (i=0; i<rend-rstart; i++) { 2712 row = i + rstart; 2713 ncols = rptr[i+1] - rptr[i]; 2714 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2715 vals += ncols; 2716 cols += ncols; 2717 } 2718 } 2719 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2720 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2721 2722 if (reuse == MAT_INITIAL_MATRIX) { 2723 *matredundant = C; 2724 2725 /* create a supporting struct and attach it to C for reuse */ 2726 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2727 if (subsize == 1) { 2728 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2729 c->redundant = redund; 2730 } else { 2731 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2732 c->redundant = redund; 2733 } 2734 2735 redund->nzlocal = nzlocal; 2736 redund->nsends = nsends; 2737 redund->nrecvs = nrecvs; 2738 redund->send_rank = send_rank; 2739 redund->recv_rank = recv_rank; 2740 redund->sbuf_nz = sbuf_nz; 2741 redund->rbuf_nz = rbuf_nz; 2742 redund->sbuf_j = sbuf_j; 2743 redund->sbuf_a = sbuf_a; 2744 redund->rbuf_j = rbuf_j; 2745 redund->rbuf_a = rbuf_a; 2746 redund->psubcomm = NULL; 2747 2748 redund->Destroy = C->ops->destroy; 2749 C->ops->destroy = MatDestroy_MatRedundant; 2750 } 2751 PetscFunctionReturn(0); 2752 } 2753 2754 #undef __FUNCT__ 2755 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2756 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2757 { 2758 PetscErrorCode ierr; 2759 MPI_Comm comm; 2760 PetscMPIInt size,subsize; 2761 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2762 Mat_Redundant *redund=NULL; 2763 PetscSubcomm psubcomm=NULL; 2764 MPI_Comm subcomm_in=subcomm; 2765 Mat *matseq; 2766 IS isrow,iscol; 2767 2768 PetscFunctionBegin; 2769 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2770 if (reuse == MAT_INITIAL_MATRIX) { 2771 /* create psubcomm, then get subcomm */ 2772 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2773 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2774 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2775 2776 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2777 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2778 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2779 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2780 subcomm = psubcomm->comm; 2781 } else { /* retrieve psubcomm and subcomm */ 2782 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2783 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2784 if (subsize == 1) { 2785 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2786 redund = c->redundant; 2787 } else { 2788 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2789 redund = c->redundant; 2790 } 2791 psubcomm = redund->psubcomm; 2792 } 2793 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2794 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2795 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2796 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2797 if (subsize == 1) { 2798 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2799 c->redundant->psubcomm = psubcomm; 2800 } else { 2801 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2802 c->redundant->psubcomm = psubcomm ; 2803 } 2804 } 2805 PetscFunctionReturn(0); 2806 } 2807 } 2808 2809 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2810 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2811 if (reuse == MAT_INITIAL_MATRIX) { 2812 /* create a local sequential matrix matseq[0] */ 2813 mloc_sub = PETSC_DECIDE; 2814 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2815 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2816 rstart = rend - mloc_sub; 2817 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2818 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2819 } else { /* reuse == MAT_REUSE_MATRIX */ 2820 if (subsize == 1) { 2821 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2822 redund = c->redundant; 2823 } else { 2824 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2825 redund = c->redundant; 2826 } 2827 2828 isrow = redund->isrow; 2829 iscol = redund->iscol; 2830 matseq = redund->matseq; 2831 } 2832 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2833 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2834 2835 if (reuse == MAT_INITIAL_MATRIX) { 2836 /* create a supporting struct and attach it to C for reuse */ 2837 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2838 if (subsize == 1) { 2839 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2840 c->redundant = redund; 2841 } else { 2842 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2843 c->redundant = redund; 2844 } 2845 redund->isrow = isrow; 2846 redund->iscol = iscol; 2847 redund->matseq = matseq; 2848 redund->psubcomm = psubcomm; 2849 redund->Destroy = (*matredundant)->ops->destroy; 2850 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2851 } 2852 PetscFunctionReturn(0); 2853 } 2854 2855 #undef __FUNCT__ 2856 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2857 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2858 { 2859 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2860 PetscErrorCode ierr; 2861 PetscInt i,*idxb = 0; 2862 PetscScalar *va,*vb; 2863 Vec vtmp; 2864 2865 PetscFunctionBegin; 2866 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2867 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2868 if (idx) { 2869 for (i=0; i<A->rmap->n; i++) { 2870 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2871 } 2872 } 2873 2874 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2875 if (idx) { 2876 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2877 } 2878 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2879 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2880 2881 for (i=0; i<A->rmap->n; i++) { 2882 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2883 va[i] = vb[i]; 2884 if (idx) idx[i] = a->garray[idxb[i]]; 2885 } 2886 } 2887 2888 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2889 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2890 ierr = PetscFree(idxb);CHKERRQ(ierr); 2891 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2892 PetscFunctionReturn(0); 2893 } 2894 2895 #undef __FUNCT__ 2896 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2897 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2898 { 2899 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2900 PetscErrorCode ierr; 2901 PetscInt i,*idxb = 0; 2902 PetscScalar *va,*vb; 2903 Vec vtmp; 2904 2905 PetscFunctionBegin; 2906 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2907 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2908 if (idx) { 2909 for (i=0; i<A->cmap->n; i++) { 2910 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2911 } 2912 } 2913 2914 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2915 if (idx) { 2916 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2917 } 2918 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2919 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2920 2921 for (i=0; i<A->rmap->n; i++) { 2922 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2923 va[i] = vb[i]; 2924 if (idx) idx[i] = a->garray[idxb[i]]; 2925 } 2926 } 2927 2928 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2929 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2930 ierr = PetscFree(idxb);CHKERRQ(ierr); 2931 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2932 PetscFunctionReturn(0); 2933 } 2934 2935 #undef __FUNCT__ 2936 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2937 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2938 { 2939 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2940 PetscInt n = A->rmap->n; 2941 PetscInt cstart = A->cmap->rstart; 2942 PetscInt *cmap = mat->garray; 2943 PetscInt *diagIdx, *offdiagIdx; 2944 Vec diagV, offdiagV; 2945 PetscScalar *a, *diagA, *offdiagA; 2946 PetscInt r; 2947 PetscErrorCode ierr; 2948 2949 PetscFunctionBegin; 2950 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2951 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2952 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2953 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2954 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2955 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2956 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2957 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2958 for (r = 0; r < n; ++r) { 2959 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2960 a[r] = diagA[r]; 2961 idx[r] = cstart + diagIdx[r]; 2962 } else { 2963 a[r] = offdiagA[r]; 2964 idx[r] = cmap[offdiagIdx[r]]; 2965 } 2966 } 2967 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2968 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2969 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2970 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2971 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2972 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2973 PetscFunctionReturn(0); 2974 } 2975 2976 #undef __FUNCT__ 2977 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2978 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2979 { 2980 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2981 PetscInt n = A->rmap->n; 2982 PetscInt cstart = A->cmap->rstart; 2983 PetscInt *cmap = mat->garray; 2984 PetscInt *diagIdx, *offdiagIdx; 2985 Vec diagV, offdiagV; 2986 PetscScalar *a, *diagA, *offdiagA; 2987 PetscInt r; 2988 PetscErrorCode ierr; 2989 2990 PetscFunctionBegin; 2991 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2992 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2993 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2994 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2995 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2996 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2997 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2998 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2999 for (r = 0; r < n; ++r) { 3000 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3001 a[r] = diagA[r]; 3002 idx[r] = cstart + diagIdx[r]; 3003 } else { 3004 a[r] = offdiagA[r]; 3005 idx[r] = cmap[offdiagIdx[r]]; 3006 } 3007 } 3008 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3009 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3010 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3011 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3012 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3013 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3014 PetscFunctionReturn(0); 3015 } 3016 3017 #undef __FUNCT__ 3018 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3019 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3020 { 3021 PetscErrorCode ierr; 3022 Mat *dummy; 3023 3024 PetscFunctionBegin; 3025 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3026 *newmat = *dummy; 3027 ierr = PetscFree(dummy);CHKERRQ(ierr); 3028 PetscFunctionReturn(0); 3029 } 3030 3031 #undef __FUNCT__ 3032 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3033 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3034 { 3035 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3036 PetscErrorCode ierr; 3037 3038 PetscFunctionBegin; 3039 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3040 PetscFunctionReturn(0); 3041 } 3042 3043 #undef __FUNCT__ 3044 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3045 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3046 { 3047 PetscErrorCode ierr; 3048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3049 3050 PetscFunctionBegin; 3051 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3052 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3053 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3054 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3055 PetscFunctionReturn(0); 3056 } 3057 3058 /* -------------------------------------------------------------------*/ 3059 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3060 MatGetRow_MPIAIJ, 3061 MatRestoreRow_MPIAIJ, 3062 MatMult_MPIAIJ, 3063 /* 4*/ MatMultAdd_MPIAIJ, 3064 MatMultTranspose_MPIAIJ, 3065 MatMultTransposeAdd_MPIAIJ, 3066 #if defined(PETSC_HAVE_PBGL) 3067 MatSolve_MPIAIJ, 3068 #else 3069 0, 3070 #endif 3071 0, 3072 0, 3073 /*10*/ 0, 3074 0, 3075 0, 3076 MatSOR_MPIAIJ, 3077 MatTranspose_MPIAIJ, 3078 /*15*/ MatGetInfo_MPIAIJ, 3079 MatEqual_MPIAIJ, 3080 MatGetDiagonal_MPIAIJ, 3081 MatDiagonalScale_MPIAIJ, 3082 MatNorm_MPIAIJ, 3083 /*20*/ MatAssemblyBegin_MPIAIJ, 3084 MatAssemblyEnd_MPIAIJ, 3085 MatSetOption_MPIAIJ, 3086 MatZeroEntries_MPIAIJ, 3087 /*24*/ MatZeroRows_MPIAIJ, 3088 0, 3089 #if defined(PETSC_HAVE_PBGL) 3090 0, 3091 #else 3092 0, 3093 #endif 3094 0, 3095 0, 3096 /*29*/ MatSetUp_MPIAIJ, 3097 #if defined(PETSC_HAVE_PBGL) 3098 0, 3099 #else 3100 0, 3101 #endif 3102 0, 3103 0, 3104 0, 3105 /*34*/ MatDuplicate_MPIAIJ, 3106 0, 3107 0, 3108 0, 3109 0, 3110 /*39*/ MatAXPY_MPIAIJ, 3111 MatGetSubMatrices_MPIAIJ, 3112 MatIncreaseOverlap_MPIAIJ, 3113 MatGetValues_MPIAIJ, 3114 MatCopy_MPIAIJ, 3115 /*44*/ MatGetRowMax_MPIAIJ, 3116 MatScale_MPIAIJ, 3117 0, 3118 0, 3119 MatZeroRowsColumns_MPIAIJ, 3120 /*49*/ MatSetRandom_MPIAIJ, 3121 0, 3122 0, 3123 0, 3124 0, 3125 /*54*/ MatFDColoringCreate_MPIXAIJ, 3126 0, 3127 MatSetUnfactored_MPIAIJ, 3128 MatPermute_MPIAIJ, 3129 0, 3130 /*59*/ MatGetSubMatrix_MPIAIJ, 3131 MatDestroy_MPIAIJ, 3132 MatView_MPIAIJ, 3133 0, 3134 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3135 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3136 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3137 0, 3138 0, 3139 0, 3140 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3141 MatGetRowMinAbs_MPIAIJ, 3142 0, 3143 MatSetColoring_MPIAIJ, 3144 0, 3145 MatSetValuesAdifor_MPIAIJ, 3146 /*75*/ MatFDColoringApply_AIJ, 3147 0, 3148 0, 3149 0, 3150 MatFindZeroDiagonals_MPIAIJ, 3151 /*80*/ 0, 3152 0, 3153 0, 3154 /*83*/ MatLoad_MPIAIJ, 3155 0, 3156 0, 3157 0, 3158 0, 3159 0, 3160 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3161 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3162 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3163 MatPtAP_MPIAIJ_MPIAIJ, 3164 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3165 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3166 0, 3167 0, 3168 0, 3169 0, 3170 /*99*/ 0, 3171 0, 3172 0, 3173 MatConjugate_MPIAIJ, 3174 0, 3175 /*104*/MatSetValuesRow_MPIAIJ, 3176 MatRealPart_MPIAIJ, 3177 MatImaginaryPart_MPIAIJ, 3178 0, 3179 0, 3180 /*109*/0, 3181 MatGetRedundantMatrix_MPIAIJ, 3182 MatGetRowMin_MPIAIJ, 3183 0, 3184 0, 3185 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3186 0, 3187 0, 3188 0, 3189 0, 3190 /*119*/0, 3191 0, 3192 0, 3193 0, 3194 MatGetMultiProcBlock_MPIAIJ, 3195 /*124*/MatFindNonzeroRows_MPIAIJ, 3196 MatGetColumnNorms_MPIAIJ, 3197 MatInvertBlockDiagonal_MPIAIJ, 3198 0, 3199 MatGetSubMatricesParallel_MPIAIJ, 3200 /*129*/0, 3201 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3202 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3203 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3204 0, 3205 /*134*/0, 3206 0, 3207 0, 3208 0, 3209 0, 3210 /*139*/0, 3211 0, 3212 0, 3213 MatFDColoringSetUp_MPIXAIJ 3214 }; 3215 3216 /* ----------------------------------------------------------------------------------------*/ 3217 3218 #undef __FUNCT__ 3219 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3220 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3221 { 3222 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3223 PetscErrorCode ierr; 3224 3225 PetscFunctionBegin; 3226 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3227 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3228 PetscFunctionReturn(0); 3229 } 3230 3231 #undef __FUNCT__ 3232 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3233 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3234 { 3235 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3236 PetscErrorCode ierr; 3237 3238 PetscFunctionBegin; 3239 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3240 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3241 PetscFunctionReturn(0); 3242 } 3243 3244 #undef __FUNCT__ 3245 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3246 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3247 { 3248 Mat_MPIAIJ *b; 3249 PetscErrorCode ierr; 3250 3251 PetscFunctionBegin; 3252 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3253 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3254 b = (Mat_MPIAIJ*)B->data; 3255 3256 if (!B->preallocated) { 3257 /* Explicitly create 2 MATSEQAIJ matrices. */ 3258 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3259 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3260 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3261 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3262 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3263 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3264 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3265 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3266 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3267 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3268 } 3269 3270 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3271 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3272 B->preallocated = PETSC_TRUE; 3273 PetscFunctionReturn(0); 3274 } 3275 3276 #undef __FUNCT__ 3277 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3278 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3279 { 3280 Mat mat; 3281 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3282 PetscErrorCode ierr; 3283 3284 PetscFunctionBegin; 3285 *newmat = 0; 3286 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3287 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3288 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3289 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3290 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3291 a = (Mat_MPIAIJ*)mat->data; 3292 3293 mat->factortype = matin->factortype; 3294 mat->rmap->bs = matin->rmap->bs; 3295 mat->cmap->bs = matin->cmap->bs; 3296 mat->assembled = PETSC_TRUE; 3297 mat->insertmode = NOT_SET_VALUES; 3298 mat->preallocated = PETSC_TRUE; 3299 3300 a->size = oldmat->size; 3301 a->rank = oldmat->rank; 3302 a->donotstash = oldmat->donotstash; 3303 a->roworiented = oldmat->roworiented; 3304 a->rowindices = 0; 3305 a->rowvalues = 0; 3306 a->getrowactive = PETSC_FALSE; 3307 3308 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3309 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3310 3311 if (oldmat->colmap) { 3312 #if defined(PETSC_USE_CTABLE) 3313 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3314 #else 3315 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3316 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3317 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3318 #endif 3319 } else a->colmap = 0; 3320 if (oldmat->garray) { 3321 PetscInt len; 3322 len = oldmat->B->cmap->n; 3323 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3324 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3325 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3326 } else a->garray = 0; 3327 3328 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3329 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3330 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3331 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3332 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3333 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3334 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3335 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3336 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3337 *newmat = mat; 3338 PetscFunctionReturn(0); 3339 } 3340 3341 3342 3343 #undef __FUNCT__ 3344 #define __FUNCT__ "MatLoad_MPIAIJ" 3345 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3346 { 3347 PetscScalar *vals,*svals; 3348 MPI_Comm comm; 3349 PetscErrorCode ierr; 3350 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3351 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3352 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3353 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3354 PetscInt cend,cstart,n,*rowners,sizesset=1; 3355 int fd; 3356 PetscInt bs = 1; 3357 3358 PetscFunctionBegin; 3359 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3360 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3361 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3362 if (!rank) { 3363 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3364 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3365 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3366 } 3367 3368 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3369 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3370 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3371 3372 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3373 3374 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3375 M = header[1]; N = header[2]; 3376 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3377 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3378 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3379 3380 /* If global sizes are set, check if they are consistent with that given in the file */ 3381 if (sizesset) { 3382 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3383 } 3384 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3385 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3386 3387 /* determine ownership of all (block) rows */ 3388 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3389 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3390 else m = newMat->rmap->n; /* Set by user */ 3391 3392 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3393 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3394 3395 /* First process needs enough room for process with most rows */ 3396 if (!rank) { 3397 mmax = rowners[1]; 3398 for (i=2; i<=size; i++) { 3399 mmax = PetscMax(mmax, rowners[i]); 3400 } 3401 } else mmax = -1; /* unused, but compilers complain */ 3402 3403 rowners[0] = 0; 3404 for (i=2; i<=size; i++) { 3405 rowners[i] += rowners[i-1]; 3406 } 3407 rstart = rowners[rank]; 3408 rend = rowners[rank+1]; 3409 3410 /* distribute row lengths to all processors */ 3411 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3412 if (!rank) { 3413 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3414 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3415 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3416 for (j=0; j<m; j++) { 3417 procsnz[0] += ourlens[j]; 3418 } 3419 for (i=1; i<size; i++) { 3420 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3421 /* calculate the number of nonzeros on each processor */ 3422 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3423 procsnz[i] += rowlengths[j]; 3424 } 3425 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3426 } 3427 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3428 } else { 3429 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3430 } 3431 3432 if (!rank) { 3433 /* determine max buffer needed and allocate it */ 3434 maxnz = 0; 3435 for (i=0; i<size; i++) { 3436 maxnz = PetscMax(maxnz,procsnz[i]); 3437 } 3438 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3439 3440 /* read in my part of the matrix column indices */ 3441 nz = procsnz[0]; 3442 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3443 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3444 3445 /* read in every one elses and ship off */ 3446 for (i=1; i<size; i++) { 3447 nz = procsnz[i]; 3448 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3449 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3450 } 3451 ierr = PetscFree(cols);CHKERRQ(ierr); 3452 } else { 3453 /* determine buffer space needed for message */ 3454 nz = 0; 3455 for (i=0; i<m; i++) { 3456 nz += ourlens[i]; 3457 } 3458 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3459 3460 /* receive message of column indices*/ 3461 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3462 } 3463 3464 /* determine column ownership if matrix is not square */ 3465 if (N != M) { 3466 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3467 else n = newMat->cmap->n; 3468 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3469 cstart = cend - n; 3470 } else { 3471 cstart = rstart; 3472 cend = rend; 3473 n = cend - cstart; 3474 } 3475 3476 /* loop over local rows, determining number of off diagonal entries */ 3477 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3478 jj = 0; 3479 for (i=0; i<m; i++) { 3480 for (j=0; j<ourlens[i]; j++) { 3481 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3482 jj++; 3483 } 3484 } 3485 3486 for (i=0; i<m; i++) { 3487 ourlens[i] -= offlens[i]; 3488 } 3489 if (!sizesset) { 3490 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3491 } 3492 3493 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3494 3495 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3496 3497 for (i=0; i<m; i++) { 3498 ourlens[i] += offlens[i]; 3499 } 3500 3501 if (!rank) { 3502 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3503 3504 /* read in my part of the matrix numerical values */ 3505 nz = procsnz[0]; 3506 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3507 3508 /* insert into matrix */ 3509 jj = rstart; 3510 smycols = mycols; 3511 svals = vals; 3512 for (i=0; i<m; i++) { 3513 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3514 smycols += ourlens[i]; 3515 svals += ourlens[i]; 3516 jj++; 3517 } 3518 3519 /* read in other processors and ship out */ 3520 for (i=1; i<size; i++) { 3521 nz = procsnz[i]; 3522 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3523 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3524 } 3525 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3526 } else { 3527 /* receive numeric values */ 3528 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3529 3530 /* receive message of values*/ 3531 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3532 3533 /* insert into matrix */ 3534 jj = rstart; 3535 smycols = mycols; 3536 svals = vals; 3537 for (i=0; i<m; i++) { 3538 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3539 smycols += ourlens[i]; 3540 svals += ourlens[i]; 3541 jj++; 3542 } 3543 } 3544 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3545 ierr = PetscFree(vals);CHKERRQ(ierr); 3546 ierr = PetscFree(mycols);CHKERRQ(ierr); 3547 ierr = PetscFree(rowners);CHKERRQ(ierr); 3548 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3549 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3550 PetscFunctionReturn(0); 3551 } 3552 3553 #undef __FUNCT__ 3554 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3555 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3556 { 3557 PetscErrorCode ierr; 3558 IS iscol_local; 3559 PetscInt csize; 3560 3561 PetscFunctionBegin; 3562 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3563 if (call == MAT_REUSE_MATRIX) { 3564 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3565 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3566 } else { 3567 PetscInt cbs; 3568 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3569 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3570 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3571 } 3572 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3573 if (call == MAT_INITIAL_MATRIX) { 3574 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3575 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3576 } 3577 PetscFunctionReturn(0); 3578 } 3579 3580 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3581 #undef __FUNCT__ 3582 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3583 /* 3584 Not great since it makes two copies of the submatrix, first an SeqAIJ 3585 in local and then by concatenating the local matrices the end result. 3586 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3587 3588 Note: This requires a sequential iscol with all indices. 3589 */ 3590 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3591 { 3592 PetscErrorCode ierr; 3593 PetscMPIInt rank,size; 3594 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3595 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3596 PetscBool allcolumns, colflag; 3597 Mat M,Mreuse; 3598 MatScalar *vwork,*aa; 3599 MPI_Comm comm; 3600 Mat_SeqAIJ *aij; 3601 3602 PetscFunctionBegin; 3603 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3604 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3605 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3606 3607 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3608 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3609 if (colflag && ncol == mat->cmap->N) { 3610 allcolumns = PETSC_TRUE; 3611 } else { 3612 allcolumns = PETSC_FALSE; 3613 } 3614 if (call == MAT_REUSE_MATRIX) { 3615 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3616 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3617 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3618 } else { 3619 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3620 } 3621 3622 /* 3623 m - number of local rows 3624 n - number of columns (same on all processors) 3625 rstart - first row in new global matrix generated 3626 */ 3627 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3628 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3629 if (call == MAT_INITIAL_MATRIX) { 3630 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3631 ii = aij->i; 3632 jj = aij->j; 3633 3634 /* 3635 Determine the number of non-zeros in the diagonal and off-diagonal 3636 portions of the matrix in order to do correct preallocation 3637 */ 3638 3639 /* first get start and end of "diagonal" columns */ 3640 if (csize == PETSC_DECIDE) { 3641 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3642 if (mglobal == n) { /* square matrix */ 3643 nlocal = m; 3644 } else { 3645 nlocal = n/size + ((n % size) > rank); 3646 } 3647 } else { 3648 nlocal = csize; 3649 } 3650 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3651 rstart = rend - nlocal; 3652 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3653 3654 /* next, compute all the lengths */ 3655 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3656 olens = dlens + m; 3657 for (i=0; i<m; i++) { 3658 jend = ii[i+1] - ii[i]; 3659 olen = 0; 3660 dlen = 0; 3661 for (j=0; j<jend; j++) { 3662 if (*jj < rstart || *jj >= rend) olen++; 3663 else dlen++; 3664 jj++; 3665 } 3666 olens[i] = olen; 3667 dlens[i] = dlen; 3668 } 3669 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3670 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3671 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3672 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3673 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3674 ierr = PetscFree(dlens);CHKERRQ(ierr); 3675 } else { 3676 PetscInt ml,nl; 3677 3678 M = *newmat; 3679 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3680 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3681 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3682 /* 3683 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3684 rather than the slower MatSetValues(). 3685 */ 3686 M->was_assembled = PETSC_TRUE; 3687 M->assembled = PETSC_FALSE; 3688 } 3689 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3690 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3691 ii = aij->i; 3692 jj = aij->j; 3693 aa = aij->a; 3694 for (i=0; i<m; i++) { 3695 row = rstart + i; 3696 nz = ii[i+1] - ii[i]; 3697 cwork = jj; jj += nz; 3698 vwork = aa; aa += nz; 3699 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3700 } 3701 3702 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3703 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3704 *newmat = M; 3705 3706 /* save submatrix used in processor for next request */ 3707 if (call == MAT_INITIAL_MATRIX) { 3708 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3709 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3710 } 3711 PetscFunctionReturn(0); 3712 } 3713 3714 #undef __FUNCT__ 3715 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3716 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3717 { 3718 PetscInt m,cstart, cend,j,nnz,i,d; 3719 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3720 const PetscInt *JJ; 3721 PetscScalar *values; 3722 PetscErrorCode ierr; 3723 3724 PetscFunctionBegin; 3725 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3726 3727 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3728 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3729 m = B->rmap->n; 3730 cstart = B->cmap->rstart; 3731 cend = B->cmap->rend; 3732 rstart = B->rmap->rstart; 3733 3734 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3735 3736 #if defined(PETSC_USE_DEBUGGING) 3737 for (i=0; i<m; i++) { 3738 nnz = Ii[i+1]- Ii[i]; 3739 JJ = J + Ii[i]; 3740 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3741 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3742 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3743 } 3744 #endif 3745 3746 for (i=0; i<m; i++) { 3747 nnz = Ii[i+1]- Ii[i]; 3748 JJ = J + Ii[i]; 3749 nnz_max = PetscMax(nnz_max,nnz); 3750 d = 0; 3751 for (j=0; j<nnz; j++) { 3752 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3753 } 3754 d_nnz[i] = d; 3755 o_nnz[i] = nnz - d; 3756 } 3757 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3758 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3759 3760 if (v) values = (PetscScalar*)v; 3761 else { 3762 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3763 } 3764 3765 for (i=0; i<m; i++) { 3766 ii = i + rstart; 3767 nnz = Ii[i+1]- Ii[i]; 3768 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3769 } 3770 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3771 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3772 3773 if (!v) { 3774 ierr = PetscFree(values);CHKERRQ(ierr); 3775 } 3776 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3777 PetscFunctionReturn(0); 3778 } 3779 3780 #undef __FUNCT__ 3781 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3782 /*@ 3783 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3784 (the default parallel PETSc format). 3785 3786 Collective on MPI_Comm 3787 3788 Input Parameters: 3789 + B - the matrix 3790 . i - the indices into j for the start of each local row (starts with zero) 3791 . j - the column indices for each local row (starts with zero) 3792 - v - optional values in the matrix 3793 3794 Level: developer 3795 3796 Notes: 3797 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3798 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3799 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3800 3801 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3802 3803 The format which is used for the sparse matrix input, is equivalent to a 3804 row-major ordering.. i.e for the following matrix, the input data expected is 3805 as shown: 3806 3807 1 0 0 3808 2 0 3 P0 3809 ------- 3810 4 5 6 P1 3811 3812 Process0 [P0]: rows_owned=[0,1] 3813 i = {0,1,3} [size = nrow+1 = 2+1] 3814 j = {0,0,2} [size = nz = 6] 3815 v = {1,2,3} [size = nz = 6] 3816 3817 Process1 [P1]: rows_owned=[2] 3818 i = {0,3} [size = nrow+1 = 1+1] 3819 j = {0,1,2} [size = nz = 6] 3820 v = {4,5,6} [size = nz = 6] 3821 3822 .keywords: matrix, aij, compressed row, sparse, parallel 3823 3824 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3825 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3826 @*/ 3827 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3828 { 3829 PetscErrorCode ierr; 3830 3831 PetscFunctionBegin; 3832 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3833 PetscFunctionReturn(0); 3834 } 3835 3836 #undef __FUNCT__ 3837 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3838 /*@C 3839 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3840 (the default parallel PETSc format). For good matrix assembly performance 3841 the user should preallocate the matrix storage by setting the parameters 3842 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3843 performance can be increased by more than a factor of 50. 3844 3845 Collective on MPI_Comm 3846 3847 Input Parameters: 3848 + A - the matrix 3849 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3850 (same value is used for all local rows) 3851 . d_nnz - array containing the number of nonzeros in the various rows of the 3852 DIAGONAL portion of the local submatrix (possibly different for each row) 3853 or NULL, if d_nz is used to specify the nonzero structure. 3854 The size of this array is equal to the number of local rows, i.e 'm'. 3855 For matrices that will be factored, you must leave room for (and set) 3856 the diagonal entry even if it is zero. 3857 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3858 submatrix (same value is used for all local rows). 3859 - o_nnz - array containing the number of nonzeros in the various rows of the 3860 OFF-DIAGONAL portion of the local submatrix (possibly different for 3861 each row) or NULL, if o_nz is used to specify the nonzero 3862 structure. The size of this array is equal to the number 3863 of local rows, i.e 'm'. 3864 3865 If the *_nnz parameter is given then the *_nz parameter is ignored 3866 3867 The AIJ format (also called the Yale sparse matrix format or 3868 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3869 storage. The stored row and column indices begin with zero. 3870 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3871 3872 The parallel matrix is partitioned such that the first m0 rows belong to 3873 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3874 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3875 3876 The DIAGONAL portion of the local submatrix of a processor can be defined 3877 as the submatrix which is obtained by extraction the part corresponding to 3878 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3879 first row that belongs to the processor, r2 is the last row belonging to 3880 the this processor, and c1-c2 is range of indices of the local part of a 3881 vector suitable for applying the matrix to. This is an mxn matrix. In the 3882 common case of a square matrix, the row and column ranges are the same and 3883 the DIAGONAL part is also square. The remaining portion of the local 3884 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3885 3886 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3887 3888 You can call MatGetInfo() to get information on how effective the preallocation was; 3889 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3890 You can also run with the option -info and look for messages with the string 3891 malloc in them to see if additional memory allocation was needed. 3892 3893 Example usage: 3894 3895 Consider the following 8x8 matrix with 34 non-zero values, that is 3896 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3897 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3898 as follows: 3899 3900 .vb 3901 1 2 0 | 0 3 0 | 0 4 3902 Proc0 0 5 6 | 7 0 0 | 8 0 3903 9 0 10 | 11 0 0 | 12 0 3904 ------------------------------------- 3905 13 0 14 | 15 16 17 | 0 0 3906 Proc1 0 18 0 | 19 20 21 | 0 0 3907 0 0 0 | 22 23 0 | 24 0 3908 ------------------------------------- 3909 Proc2 25 26 27 | 0 0 28 | 29 0 3910 30 0 0 | 31 32 33 | 0 34 3911 .ve 3912 3913 This can be represented as a collection of submatrices as: 3914 3915 .vb 3916 A B C 3917 D E F 3918 G H I 3919 .ve 3920 3921 Where the submatrices A,B,C are owned by proc0, D,E,F are 3922 owned by proc1, G,H,I are owned by proc2. 3923 3924 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3925 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3926 The 'M','N' parameters are 8,8, and have the same values on all procs. 3927 3928 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3929 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3930 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3931 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3932 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3933 matrix, ans [DF] as another SeqAIJ matrix. 3934 3935 When d_nz, o_nz parameters are specified, d_nz storage elements are 3936 allocated for every row of the local diagonal submatrix, and o_nz 3937 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3938 One way to choose d_nz and o_nz is to use the max nonzerors per local 3939 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3940 In this case, the values of d_nz,o_nz are: 3941 .vb 3942 proc0 : dnz = 2, o_nz = 2 3943 proc1 : dnz = 3, o_nz = 2 3944 proc2 : dnz = 1, o_nz = 4 3945 .ve 3946 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3947 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3948 for proc3. i.e we are using 12+15+10=37 storage locations to store 3949 34 values. 3950 3951 When d_nnz, o_nnz parameters are specified, the storage is specified 3952 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3953 In the above case the values for d_nnz,o_nnz are: 3954 .vb 3955 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3956 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3957 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3958 .ve 3959 Here the space allocated is sum of all the above values i.e 34, and 3960 hence pre-allocation is perfect. 3961 3962 Level: intermediate 3963 3964 .keywords: matrix, aij, compressed row, sparse, parallel 3965 3966 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3967 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3968 @*/ 3969 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3970 { 3971 PetscErrorCode ierr; 3972 3973 PetscFunctionBegin; 3974 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3975 PetscValidType(B,1); 3976 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3977 PetscFunctionReturn(0); 3978 } 3979 3980 #undef __FUNCT__ 3981 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3982 /*@ 3983 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3984 CSR format the local rows. 3985 3986 Collective on MPI_Comm 3987 3988 Input Parameters: 3989 + comm - MPI communicator 3990 . m - number of local rows (Cannot be PETSC_DECIDE) 3991 . n - This value should be the same as the local size used in creating the 3992 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3993 calculated if N is given) For square matrices n is almost always m. 3994 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3995 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3996 . i - row indices 3997 . j - column indices 3998 - a - matrix values 3999 4000 Output Parameter: 4001 . mat - the matrix 4002 4003 Level: intermediate 4004 4005 Notes: 4006 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4007 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4008 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4009 4010 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4011 4012 The format which is used for the sparse matrix input, is equivalent to a 4013 row-major ordering.. i.e for the following matrix, the input data expected is 4014 as shown: 4015 4016 1 0 0 4017 2 0 3 P0 4018 ------- 4019 4 5 6 P1 4020 4021 Process0 [P0]: rows_owned=[0,1] 4022 i = {0,1,3} [size = nrow+1 = 2+1] 4023 j = {0,0,2} [size = nz = 6] 4024 v = {1,2,3} [size = nz = 6] 4025 4026 Process1 [P1]: rows_owned=[2] 4027 i = {0,3} [size = nrow+1 = 1+1] 4028 j = {0,1,2} [size = nz = 6] 4029 v = {4,5,6} [size = nz = 6] 4030 4031 .keywords: matrix, aij, compressed row, sparse, parallel 4032 4033 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4034 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4035 @*/ 4036 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4037 { 4038 PetscErrorCode ierr; 4039 4040 PetscFunctionBegin; 4041 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4042 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4043 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4044 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4045 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4046 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4047 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4048 PetscFunctionReturn(0); 4049 } 4050 4051 #undef __FUNCT__ 4052 #define __FUNCT__ "MatCreateAIJ" 4053 /*@C 4054 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4055 (the default parallel PETSc format). For good matrix assembly performance 4056 the user should preallocate the matrix storage by setting the parameters 4057 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4058 performance can be increased by more than a factor of 50. 4059 4060 Collective on MPI_Comm 4061 4062 Input Parameters: 4063 + comm - MPI communicator 4064 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4065 This value should be the same as the local size used in creating the 4066 y vector for the matrix-vector product y = Ax. 4067 . n - This value should be the same as the local size used in creating the 4068 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4069 calculated if N is given) For square matrices n is almost always m. 4070 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4071 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4072 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4073 (same value is used for all local rows) 4074 . d_nnz - array containing the number of nonzeros in the various rows of the 4075 DIAGONAL portion of the local submatrix (possibly different for each row) 4076 or NULL, if d_nz is used to specify the nonzero structure. 4077 The size of this array is equal to the number of local rows, i.e 'm'. 4078 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4079 submatrix (same value is used for all local rows). 4080 - o_nnz - array containing the number of nonzeros in the various rows of the 4081 OFF-DIAGONAL portion of the local submatrix (possibly different for 4082 each row) or NULL, if o_nz is used to specify the nonzero 4083 structure. The size of this array is equal to the number 4084 of local rows, i.e 'm'. 4085 4086 Output Parameter: 4087 . A - the matrix 4088 4089 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4090 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4091 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4092 4093 Notes: 4094 If the *_nnz parameter is given then the *_nz parameter is ignored 4095 4096 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4097 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4098 storage requirements for this matrix. 4099 4100 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4101 processor than it must be used on all processors that share the object for 4102 that argument. 4103 4104 The user MUST specify either the local or global matrix dimensions 4105 (possibly both). 4106 4107 The parallel matrix is partitioned across processors such that the 4108 first m0 rows belong to process 0, the next m1 rows belong to 4109 process 1, the next m2 rows belong to process 2 etc.. where 4110 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4111 values corresponding to [m x N] submatrix. 4112 4113 The columns are logically partitioned with the n0 columns belonging 4114 to 0th partition, the next n1 columns belonging to the next 4115 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4116 4117 The DIAGONAL portion of the local submatrix on any given processor 4118 is the submatrix corresponding to the rows and columns m,n 4119 corresponding to the given processor. i.e diagonal matrix on 4120 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4121 etc. The remaining portion of the local submatrix [m x (N-n)] 4122 constitute the OFF-DIAGONAL portion. The example below better 4123 illustrates this concept. 4124 4125 For a square global matrix we define each processor's diagonal portion 4126 to be its local rows and the corresponding columns (a square submatrix); 4127 each processor's off-diagonal portion encompasses the remainder of the 4128 local matrix (a rectangular submatrix). 4129 4130 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4131 4132 When calling this routine with a single process communicator, a matrix of 4133 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4134 type of communicator, use the construction mechanism: 4135 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4136 4137 By default, this format uses inodes (identical nodes) when possible. 4138 We search for consecutive rows with the same nonzero structure, thereby 4139 reusing matrix information to achieve increased efficiency. 4140 4141 Options Database Keys: 4142 + -mat_no_inode - Do not use inodes 4143 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4144 - -mat_aij_oneindex - Internally use indexing starting at 1 4145 rather than 0. Note that when calling MatSetValues(), 4146 the user still MUST index entries starting at 0! 4147 4148 4149 Example usage: 4150 4151 Consider the following 8x8 matrix with 34 non-zero values, that is 4152 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4153 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4154 as follows: 4155 4156 .vb 4157 1 2 0 | 0 3 0 | 0 4 4158 Proc0 0 5 6 | 7 0 0 | 8 0 4159 9 0 10 | 11 0 0 | 12 0 4160 ------------------------------------- 4161 13 0 14 | 15 16 17 | 0 0 4162 Proc1 0 18 0 | 19 20 21 | 0 0 4163 0 0 0 | 22 23 0 | 24 0 4164 ------------------------------------- 4165 Proc2 25 26 27 | 0 0 28 | 29 0 4166 30 0 0 | 31 32 33 | 0 34 4167 .ve 4168 4169 This can be represented as a collection of submatrices as: 4170 4171 .vb 4172 A B C 4173 D E F 4174 G H I 4175 .ve 4176 4177 Where the submatrices A,B,C are owned by proc0, D,E,F are 4178 owned by proc1, G,H,I are owned by proc2. 4179 4180 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4181 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4182 The 'M','N' parameters are 8,8, and have the same values on all procs. 4183 4184 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4185 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4186 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4187 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4188 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4189 matrix, ans [DF] as another SeqAIJ matrix. 4190 4191 When d_nz, o_nz parameters are specified, d_nz storage elements are 4192 allocated for every row of the local diagonal submatrix, and o_nz 4193 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4194 One way to choose d_nz and o_nz is to use the max nonzerors per local 4195 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4196 In this case, the values of d_nz,o_nz are: 4197 .vb 4198 proc0 : dnz = 2, o_nz = 2 4199 proc1 : dnz = 3, o_nz = 2 4200 proc2 : dnz = 1, o_nz = 4 4201 .ve 4202 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4203 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4204 for proc3. i.e we are using 12+15+10=37 storage locations to store 4205 34 values. 4206 4207 When d_nnz, o_nnz parameters are specified, the storage is specified 4208 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4209 In the above case the values for d_nnz,o_nnz are: 4210 .vb 4211 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4212 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4213 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4214 .ve 4215 Here the space allocated is sum of all the above values i.e 34, and 4216 hence pre-allocation is perfect. 4217 4218 Level: intermediate 4219 4220 .keywords: matrix, aij, compressed row, sparse, parallel 4221 4222 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4223 MPIAIJ, MatCreateMPIAIJWithArrays() 4224 @*/ 4225 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4226 { 4227 PetscErrorCode ierr; 4228 PetscMPIInt size; 4229 4230 PetscFunctionBegin; 4231 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4232 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4233 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4234 if (size > 1) { 4235 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4236 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4237 } else { 4238 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4239 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4240 } 4241 PetscFunctionReturn(0); 4242 } 4243 4244 #undef __FUNCT__ 4245 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4246 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4247 { 4248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4249 4250 PetscFunctionBegin; 4251 *Ad = a->A; 4252 *Ao = a->B; 4253 *colmap = a->garray; 4254 PetscFunctionReturn(0); 4255 } 4256 4257 #undef __FUNCT__ 4258 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4259 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4260 { 4261 PetscErrorCode ierr; 4262 PetscInt i; 4263 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4264 4265 PetscFunctionBegin; 4266 if (coloring->ctype == IS_COLORING_GLOBAL) { 4267 ISColoringValue *allcolors,*colors; 4268 ISColoring ocoloring; 4269 4270 /* set coloring for diagonal portion */ 4271 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4272 4273 /* set coloring for off-diagonal portion */ 4274 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4275 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4276 for (i=0; i<a->B->cmap->n; i++) { 4277 colors[i] = allcolors[a->garray[i]]; 4278 } 4279 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4280 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4281 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4282 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4283 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4284 ISColoringValue *colors; 4285 PetscInt *larray; 4286 ISColoring ocoloring; 4287 4288 /* set coloring for diagonal portion */ 4289 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4290 for (i=0; i<a->A->cmap->n; i++) { 4291 larray[i] = i + A->cmap->rstart; 4292 } 4293 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4294 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4295 for (i=0; i<a->A->cmap->n; i++) { 4296 colors[i] = coloring->colors[larray[i]]; 4297 } 4298 ierr = PetscFree(larray);CHKERRQ(ierr); 4299 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4300 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4301 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4302 4303 /* set coloring for off-diagonal portion */ 4304 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4305 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4306 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4307 for (i=0; i<a->B->cmap->n; i++) { 4308 colors[i] = coloring->colors[larray[i]]; 4309 } 4310 ierr = PetscFree(larray);CHKERRQ(ierr); 4311 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4312 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4313 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4314 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4315 PetscFunctionReturn(0); 4316 } 4317 4318 #undef __FUNCT__ 4319 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4320 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4321 { 4322 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4323 PetscErrorCode ierr; 4324 4325 PetscFunctionBegin; 4326 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4327 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4328 PetscFunctionReturn(0); 4329 } 4330 4331 #undef __FUNCT__ 4332 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4333 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4334 { 4335 PetscErrorCode ierr; 4336 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4337 PetscInt *indx; 4338 4339 PetscFunctionBegin; 4340 /* This routine will ONLY return MPIAIJ type matrix */ 4341 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4342 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4343 if (n == PETSC_DECIDE) { 4344 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4345 } 4346 /* Check sum(n) = N */ 4347 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4348 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4349 4350 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4351 rstart -= m; 4352 4353 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4354 for (i=0; i<m; i++) { 4355 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4356 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4357 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4358 } 4359 4360 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4361 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4362 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4363 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4364 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4365 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4366 PetscFunctionReturn(0); 4367 } 4368 4369 #undef __FUNCT__ 4370 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4371 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4372 { 4373 PetscErrorCode ierr; 4374 PetscInt m,N,i,rstart,nnz,Ii; 4375 PetscInt *indx; 4376 PetscScalar *values; 4377 4378 PetscFunctionBegin; 4379 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4380 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4381 for (i=0; i<m; i++) { 4382 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4383 Ii = i + rstart; 4384 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4385 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4386 } 4387 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4388 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4389 PetscFunctionReturn(0); 4390 } 4391 4392 #undef __FUNCT__ 4393 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4394 /*@ 4395 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4396 matrices from each processor 4397 4398 Collective on MPI_Comm 4399 4400 Input Parameters: 4401 + comm - the communicators the parallel matrix will live on 4402 . inmat - the input sequential matrices 4403 . n - number of local columns (or PETSC_DECIDE) 4404 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4405 4406 Output Parameter: 4407 . outmat - the parallel matrix generated 4408 4409 Level: advanced 4410 4411 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4412 4413 @*/ 4414 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4415 { 4416 PetscErrorCode ierr; 4417 PetscMPIInt size; 4418 4419 PetscFunctionBegin; 4420 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4421 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4422 if (size == 1) { 4423 if (scall == MAT_INITIAL_MATRIX) { 4424 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4425 } else { 4426 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4427 } 4428 } else { 4429 if (scall == MAT_INITIAL_MATRIX) { 4430 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4431 } 4432 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4433 } 4434 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4435 PetscFunctionReturn(0); 4436 } 4437 4438 #undef __FUNCT__ 4439 #define __FUNCT__ "MatFileSplit" 4440 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4441 { 4442 PetscErrorCode ierr; 4443 PetscMPIInt rank; 4444 PetscInt m,N,i,rstart,nnz; 4445 size_t len; 4446 const PetscInt *indx; 4447 PetscViewer out; 4448 char *name; 4449 Mat B; 4450 const PetscScalar *values; 4451 4452 PetscFunctionBegin; 4453 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4454 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4455 /* Should this be the type of the diagonal block of A? */ 4456 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4457 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4458 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4459 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4460 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4461 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4462 for (i=0; i<m; i++) { 4463 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4464 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4465 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4466 } 4467 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4468 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4469 4470 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4471 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4472 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4473 sprintf(name,"%s.%d",outfile,rank); 4474 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4475 ierr = PetscFree(name);CHKERRQ(ierr); 4476 ierr = MatView(B,out);CHKERRQ(ierr); 4477 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4478 ierr = MatDestroy(&B);CHKERRQ(ierr); 4479 PetscFunctionReturn(0); 4480 } 4481 4482 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4483 #undef __FUNCT__ 4484 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4485 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4486 { 4487 PetscErrorCode ierr; 4488 Mat_Merge_SeqsToMPI *merge; 4489 PetscContainer container; 4490 4491 PetscFunctionBegin; 4492 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4493 if (container) { 4494 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4505 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4506 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4507 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4508 ierr = PetscFree(merge);CHKERRQ(ierr); 4509 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4510 } 4511 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4512 PetscFunctionReturn(0); 4513 } 4514 4515 #include <../src/mat/utils/freespace.h> 4516 #include <petscbt.h> 4517 4518 #undef __FUNCT__ 4519 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4520 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4521 { 4522 PetscErrorCode ierr; 4523 MPI_Comm comm; 4524 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4525 PetscMPIInt size,rank,taga,*len_s; 4526 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4527 PetscInt proc,m; 4528 PetscInt **buf_ri,**buf_rj; 4529 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4530 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4531 MPI_Request *s_waits,*r_waits; 4532 MPI_Status *status; 4533 MatScalar *aa=a->a; 4534 MatScalar **abuf_r,*ba_i; 4535 Mat_Merge_SeqsToMPI *merge; 4536 PetscContainer container; 4537 4538 PetscFunctionBegin; 4539 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4540 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4541 4542 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4543 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4544 4545 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4546 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4547 4548 bi = merge->bi; 4549 bj = merge->bj; 4550 buf_ri = merge->buf_ri; 4551 buf_rj = merge->buf_rj; 4552 4553 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4554 owners = merge->rowmap->range; 4555 len_s = merge->len_s; 4556 4557 /* send and recv matrix values */ 4558 /*-----------------------------*/ 4559 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4560 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4561 4562 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4563 for (proc=0,k=0; proc<size; proc++) { 4564 if (!len_s[proc]) continue; 4565 i = owners[proc]; 4566 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4567 k++; 4568 } 4569 4570 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4571 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4572 ierr = PetscFree(status);CHKERRQ(ierr); 4573 4574 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4575 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4576 4577 /* insert mat values of mpimat */ 4578 /*----------------------------*/ 4579 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4580 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4581 4582 for (k=0; k<merge->nrecv; k++) { 4583 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4584 nrows = *(buf_ri_k[k]); 4585 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4586 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4587 } 4588 4589 /* set values of ba */ 4590 m = merge->rowmap->n; 4591 for (i=0; i<m; i++) { 4592 arow = owners[rank] + i; 4593 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4594 bnzi = bi[i+1] - bi[i]; 4595 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4596 4597 /* add local non-zero vals of this proc's seqmat into ba */ 4598 anzi = ai[arow+1] - ai[arow]; 4599 aj = a->j + ai[arow]; 4600 aa = a->a + ai[arow]; 4601 nextaj = 0; 4602 for (j=0; nextaj<anzi; j++) { 4603 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4604 ba_i[j] += aa[nextaj++]; 4605 } 4606 } 4607 4608 /* add received vals into ba */ 4609 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4610 /* i-th row */ 4611 if (i == *nextrow[k]) { 4612 anzi = *(nextai[k]+1) - *nextai[k]; 4613 aj = buf_rj[k] + *(nextai[k]); 4614 aa = abuf_r[k] + *(nextai[k]); 4615 nextaj = 0; 4616 for (j=0; nextaj<anzi; j++) { 4617 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4618 ba_i[j] += aa[nextaj++]; 4619 } 4620 } 4621 nextrow[k]++; nextai[k]++; 4622 } 4623 } 4624 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4625 } 4626 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4627 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4628 4629 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4630 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4631 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4632 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4633 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4634 PetscFunctionReturn(0); 4635 } 4636 4637 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4638 4639 #undef __FUNCT__ 4640 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4641 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4642 { 4643 PetscErrorCode ierr; 4644 Mat B_mpi; 4645 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4646 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4647 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4648 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4649 PetscInt len,proc,*dnz,*onz,bs,cbs; 4650 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4651 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4652 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4653 MPI_Status *status; 4654 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4655 PetscBT lnkbt; 4656 Mat_Merge_SeqsToMPI *merge; 4657 PetscContainer container; 4658 4659 PetscFunctionBegin; 4660 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4661 4662 /* make sure it is a PETSc comm */ 4663 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4664 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4665 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4666 4667 ierr = PetscNew(&merge);CHKERRQ(ierr); 4668 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4669 4670 /* determine row ownership */ 4671 /*---------------------------------------------------------*/ 4672 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4673 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4674 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4675 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4676 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4677 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4678 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4679 4680 m = merge->rowmap->n; 4681 owners = merge->rowmap->range; 4682 4683 /* determine the number of messages to send, their lengths */ 4684 /*---------------------------------------------------------*/ 4685 len_s = merge->len_s; 4686 4687 len = 0; /* length of buf_si[] */ 4688 merge->nsend = 0; 4689 for (proc=0; proc<size; proc++) { 4690 len_si[proc] = 0; 4691 if (proc == rank) { 4692 len_s[proc] = 0; 4693 } else { 4694 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4695 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4696 } 4697 if (len_s[proc]) { 4698 merge->nsend++; 4699 nrows = 0; 4700 for (i=owners[proc]; i<owners[proc+1]; i++) { 4701 if (ai[i+1] > ai[i]) nrows++; 4702 } 4703 len_si[proc] = 2*(nrows+1); 4704 len += len_si[proc]; 4705 } 4706 } 4707 4708 /* determine the number and length of messages to receive for ij-structure */ 4709 /*-------------------------------------------------------------------------*/ 4710 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4711 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4712 4713 /* post the Irecv of j-structure */ 4714 /*-------------------------------*/ 4715 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4716 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4717 4718 /* post the Isend of j-structure */ 4719 /*--------------------------------*/ 4720 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4721 4722 for (proc=0, k=0; proc<size; proc++) { 4723 if (!len_s[proc]) continue; 4724 i = owners[proc]; 4725 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4726 k++; 4727 } 4728 4729 /* receives and sends of j-structure are complete */ 4730 /*------------------------------------------------*/ 4731 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4732 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4733 4734 /* send and recv i-structure */ 4735 /*---------------------------*/ 4736 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4737 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4738 4739 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4740 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4741 for (proc=0,k=0; proc<size; proc++) { 4742 if (!len_s[proc]) continue; 4743 /* form outgoing message for i-structure: 4744 buf_si[0]: nrows to be sent 4745 [1:nrows]: row index (global) 4746 [nrows+1:2*nrows+1]: i-structure index 4747 */ 4748 /*-------------------------------------------*/ 4749 nrows = len_si[proc]/2 - 1; 4750 buf_si_i = buf_si + nrows+1; 4751 buf_si[0] = nrows; 4752 buf_si_i[0] = 0; 4753 nrows = 0; 4754 for (i=owners[proc]; i<owners[proc+1]; i++) { 4755 anzi = ai[i+1] - ai[i]; 4756 if (anzi) { 4757 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4758 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4759 nrows++; 4760 } 4761 } 4762 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4763 k++; 4764 buf_si += len_si[proc]; 4765 } 4766 4767 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4768 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4769 4770 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4771 for (i=0; i<merge->nrecv; i++) { 4772 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4773 } 4774 4775 ierr = PetscFree(len_si);CHKERRQ(ierr); 4776 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4777 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4778 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4779 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4780 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4781 ierr = PetscFree(status);CHKERRQ(ierr); 4782 4783 /* compute a local seq matrix in each processor */ 4784 /*----------------------------------------------*/ 4785 /* allocate bi array and free space for accumulating nonzero column info */ 4786 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4787 bi[0] = 0; 4788 4789 /* create and initialize a linked list */ 4790 nlnk = N+1; 4791 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4792 4793 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4794 len = ai[owners[rank+1]] - ai[owners[rank]]; 4795 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4796 4797 current_space = free_space; 4798 4799 /* determine symbolic info for each local row */ 4800 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4801 4802 for (k=0; k<merge->nrecv; k++) { 4803 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4804 nrows = *buf_ri_k[k]; 4805 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4806 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4807 } 4808 4809 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4810 len = 0; 4811 for (i=0; i<m; i++) { 4812 bnzi = 0; 4813 /* add local non-zero cols of this proc's seqmat into lnk */ 4814 arow = owners[rank] + i; 4815 anzi = ai[arow+1] - ai[arow]; 4816 aj = a->j + ai[arow]; 4817 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4818 bnzi += nlnk; 4819 /* add received col data into lnk */ 4820 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4821 if (i == *nextrow[k]) { /* i-th row */ 4822 anzi = *(nextai[k]+1) - *nextai[k]; 4823 aj = buf_rj[k] + *nextai[k]; 4824 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4825 bnzi += nlnk; 4826 nextrow[k]++; nextai[k]++; 4827 } 4828 } 4829 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4830 4831 /* if free space is not available, make more free space */ 4832 if (current_space->local_remaining<bnzi) { 4833 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4834 nspacedouble++; 4835 } 4836 /* copy data into free space, then initialize lnk */ 4837 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4838 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4839 4840 current_space->array += bnzi; 4841 current_space->local_used += bnzi; 4842 current_space->local_remaining -= bnzi; 4843 4844 bi[i+1] = bi[i] + bnzi; 4845 } 4846 4847 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4848 4849 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4850 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4851 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4852 4853 /* create symbolic parallel matrix B_mpi */ 4854 /*---------------------------------------*/ 4855 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4856 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4857 if (n==PETSC_DECIDE) { 4858 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4859 } else { 4860 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4861 } 4862 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4863 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4864 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4865 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4866 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4867 4868 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4869 B_mpi->assembled = PETSC_FALSE; 4870 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4871 merge->bi = bi; 4872 merge->bj = bj; 4873 merge->buf_ri = buf_ri; 4874 merge->buf_rj = buf_rj; 4875 merge->coi = NULL; 4876 merge->coj = NULL; 4877 merge->owners_co = NULL; 4878 4879 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4880 4881 /* attach the supporting struct to B_mpi for reuse */ 4882 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4883 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4884 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4885 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4886 *mpimat = B_mpi; 4887 4888 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4889 PetscFunctionReturn(0); 4890 } 4891 4892 #undef __FUNCT__ 4893 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4894 /*@C 4895 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4896 matrices from each processor 4897 4898 Collective on MPI_Comm 4899 4900 Input Parameters: 4901 + comm - the communicators the parallel matrix will live on 4902 . seqmat - the input sequential matrices 4903 . m - number of local rows (or PETSC_DECIDE) 4904 . n - number of local columns (or PETSC_DECIDE) 4905 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4906 4907 Output Parameter: 4908 . mpimat - the parallel matrix generated 4909 4910 Level: advanced 4911 4912 Notes: 4913 The dimensions of the sequential matrix in each processor MUST be the same. 4914 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4915 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4916 @*/ 4917 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4918 { 4919 PetscErrorCode ierr; 4920 PetscMPIInt size; 4921 4922 PetscFunctionBegin; 4923 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4924 if (size == 1) { 4925 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4926 if (scall == MAT_INITIAL_MATRIX) { 4927 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4928 } else { 4929 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4930 } 4931 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4932 PetscFunctionReturn(0); 4933 } 4934 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4935 if (scall == MAT_INITIAL_MATRIX) { 4936 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4937 } 4938 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4939 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4940 PetscFunctionReturn(0); 4941 } 4942 4943 #undef __FUNCT__ 4944 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4945 /*@ 4946 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4947 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4948 with MatGetSize() 4949 4950 Not Collective 4951 4952 Input Parameters: 4953 + A - the matrix 4954 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4955 4956 Output Parameter: 4957 . A_loc - the local sequential matrix generated 4958 4959 Level: developer 4960 4961 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4962 4963 @*/ 4964 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4965 { 4966 PetscErrorCode ierr; 4967 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4968 Mat_SeqAIJ *mat,*a,*b; 4969 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4970 MatScalar *aa,*ba,*cam; 4971 PetscScalar *ca; 4972 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4973 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4974 PetscBool match; 4975 4976 PetscFunctionBegin; 4977 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4978 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4979 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4980 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4981 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4982 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4983 aa = a->a; ba = b->a; 4984 if (scall == MAT_INITIAL_MATRIX) { 4985 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4986 ci[0] = 0; 4987 for (i=0; i<am; i++) { 4988 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4989 } 4990 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4991 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4992 k = 0; 4993 for (i=0; i<am; i++) { 4994 ncols_o = bi[i+1] - bi[i]; 4995 ncols_d = ai[i+1] - ai[i]; 4996 /* off-diagonal portion of A */ 4997 for (jo=0; jo<ncols_o; jo++) { 4998 col = cmap[*bj]; 4999 if (col >= cstart) break; 5000 cj[k] = col; bj++; 5001 ca[k++] = *ba++; 5002 } 5003 /* diagonal portion of A */ 5004 for (j=0; j<ncols_d; j++) { 5005 cj[k] = cstart + *aj++; 5006 ca[k++] = *aa++; 5007 } 5008 /* off-diagonal portion of A */ 5009 for (j=jo; j<ncols_o; j++) { 5010 cj[k] = cmap[*bj++]; 5011 ca[k++] = *ba++; 5012 } 5013 } 5014 /* put together the new matrix */ 5015 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5016 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5017 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5018 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5019 mat->free_a = PETSC_TRUE; 5020 mat->free_ij = PETSC_TRUE; 5021 mat->nonew = 0; 5022 } else if (scall == MAT_REUSE_MATRIX) { 5023 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5024 ci = mat->i; cj = mat->j; cam = mat->a; 5025 for (i=0; i<am; i++) { 5026 /* off-diagonal portion of A */ 5027 ncols_o = bi[i+1] - bi[i]; 5028 for (jo=0; jo<ncols_o; jo++) { 5029 col = cmap[*bj]; 5030 if (col >= cstart) break; 5031 *cam++ = *ba++; bj++; 5032 } 5033 /* diagonal portion of A */ 5034 ncols_d = ai[i+1] - ai[i]; 5035 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5036 /* off-diagonal portion of A */ 5037 for (j=jo; j<ncols_o; j++) { 5038 *cam++ = *ba++; bj++; 5039 } 5040 } 5041 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5042 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5043 PetscFunctionReturn(0); 5044 } 5045 5046 #undef __FUNCT__ 5047 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5048 /*@C 5049 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5050 5051 Not Collective 5052 5053 Input Parameters: 5054 + A - the matrix 5055 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5056 - row, col - index sets of rows and columns to extract (or NULL) 5057 5058 Output Parameter: 5059 . A_loc - the local sequential matrix generated 5060 5061 Level: developer 5062 5063 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5064 5065 @*/ 5066 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5067 { 5068 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5069 PetscErrorCode ierr; 5070 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5071 IS isrowa,iscola; 5072 Mat *aloc; 5073 PetscBool match; 5074 5075 PetscFunctionBegin; 5076 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5077 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5078 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5079 if (!row) { 5080 start = A->rmap->rstart; end = A->rmap->rend; 5081 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5082 } else { 5083 isrowa = *row; 5084 } 5085 if (!col) { 5086 start = A->cmap->rstart; 5087 cmap = a->garray; 5088 nzA = a->A->cmap->n; 5089 nzB = a->B->cmap->n; 5090 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5091 ncols = 0; 5092 for (i=0; i<nzB; i++) { 5093 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5094 else break; 5095 } 5096 imark = i; 5097 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5098 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5099 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5100 } else { 5101 iscola = *col; 5102 } 5103 if (scall != MAT_INITIAL_MATRIX) { 5104 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5105 aloc[0] = *A_loc; 5106 } 5107 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5108 *A_loc = aloc[0]; 5109 ierr = PetscFree(aloc);CHKERRQ(ierr); 5110 if (!row) { 5111 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5112 } 5113 if (!col) { 5114 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5115 } 5116 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5117 PetscFunctionReturn(0); 5118 } 5119 5120 #undef __FUNCT__ 5121 #define __FUNCT__ "MatGetBrowsOfAcols" 5122 /*@C 5123 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5124 5125 Collective on Mat 5126 5127 Input Parameters: 5128 + A,B - the matrices in mpiaij format 5129 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5130 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5131 5132 Output Parameter: 5133 + rowb, colb - index sets of rows and columns of B to extract 5134 - B_seq - the sequential matrix generated 5135 5136 Level: developer 5137 5138 @*/ 5139 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5140 { 5141 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5142 PetscErrorCode ierr; 5143 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5144 IS isrowb,iscolb; 5145 Mat *bseq=NULL; 5146 5147 PetscFunctionBegin; 5148 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5149 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5150 } 5151 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5152 5153 if (scall == MAT_INITIAL_MATRIX) { 5154 start = A->cmap->rstart; 5155 cmap = a->garray; 5156 nzA = a->A->cmap->n; 5157 nzB = a->B->cmap->n; 5158 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5159 ncols = 0; 5160 for (i=0; i<nzB; i++) { /* row < local row index */ 5161 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5162 else break; 5163 } 5164 imark = i; 5165 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5166 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5167 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5168 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5169 } else { 5170 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5171 isrowb = *rowb; iscolb = *colb; 5172 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5173 bseq[0] = *B_seq; 5174 } 5175 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5176 *B_seq = bseq[0]; 5177 ierr = PetscFree(bseq);CHKERRQ(ierr); 5178 if (!rowb) { 5179 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5180 } else { 5181 *rowb = isrowb; 5182 } 5183 if (!colb) { 5184 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5185 } else { 5186 *colb = iscolb; 5187 } 5188 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5189 PetscFunctionReturn(0); 5190 } 5191 5192 #undef __FUNCT__ 5193 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5194 /* 5195 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5196 of the OFF-DIAGONAL portion of local A 5197 5198 Collective on Mat 5199 5200 Input Parameters: 5201 + A,B - the matrices in mpiaij format 5202 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5203 5204 Output Parameter: 5205 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5206 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5207 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5208 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5209 5210 Level: developer 5211 5212 */ 5213 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5214 { 5215 VecScatter_MPI_General *gen_to,*gen_from; 5216 PetscErrorCode ierr; 5217 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5218 Mat_SeqAIJ *b_oth; 5219 VecScatter ctx =a->Mvctx; 5220 MPI_Comm comm; 5221 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5222 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5223 PetscScalar *rvalues,*svalues; 5224 MatScalar *b_otha,*bufa,*bufA; 5225 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5226 MPI_Request *rwaits = NULL,*swaits = NULL; 5227 MPI_Status *sstatus,rstatus; 5228 PetscMPIInt jj; 5229 PetscInt *cols,sbs,rbs; 5230 PetscScalar *vals; 5231 5232 PetscFunctionBegin; 5233 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5234 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5235 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5236 } 5237 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5238 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5239 5240 gen_to = (VecScatter_MPI_General*)ctx->todata; 5241 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5242 rvalues = gen_from->values; /* holds the length of receiving row */ 5243 svalues = gen_to->values; /* holds the length of sending row */ 5244 nrecvs = gen_from->n; 5245 nsends = gen_to->n; 5246 5247 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5248 srow = gen_to->indices; /* local row index to be sent */ 5249 sstarts = gen_to->starts; 5250 sprocs = gen_to->procs; 5251 sstatus = gen_to->sstatus; 5252 sbs = gen_to->bs; 5253 rstarts = gen_from->starts; 5254 rprocs = gen_from->procs; 5255 rbs = gen_from->bs; 5256 5257 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5258 if (scall == MAT_INITIAL_MATRIX) { 5259 /* i-array */ 5260 /*---------*/ 5261 /* post receives */ 5262 for (i=0; i<nrecvs; i++) { 5263 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5264 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5265 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5266 } 5267 5268 /* pack the outgoing message */ 5269 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5270 5271 sstartsj[0] = 0; 5272 rstartsj[0] = 0; 5273 len = 0; /* total length of j or a array to be sent */ 5274 k = 0; 5275 for (i=0; i<nsends; i++) { 5276 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5277 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5278 for (j=0; j<nrows; j++) { 5279 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5280 for (l=0; l<sbs; l++) { 5281 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5282 5283 rowlen[j*sbs+l] = ncols; 5284 5285 len += ncols; 5286 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5287 } 5288 k++; 5289 } 5290 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5291 5292 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5293 } 5294 /* recvs and sends of i-array are completed */ 5295 i = nrecvs; 5296 while (i--) { 5297 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5298 } 5299 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5300 5301 /* allocate buffers for sending j and a arrays */ 5302 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5303 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5304 5305 /* create i-array of B_oth */ 5306 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5307 5308 b_othi[0] = 0; 5309 len = 0; /* total length of j or a array to be received */ 5310 k = 0; 5311 for (i=0; i<nrecvs; i++) { 5312 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5313 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5314 for (j=0; j<nrows; j++) { 5315 b_othi[k+1] = b_othi[k] + rowlen[j]; 5316 len += rowlen[j]; k++; 5317 } 5318 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5319 } 5320 5321 /* allocate space for j and a arrrays of B_oth */ 5322 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5323 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5324 5325 /* j-array */ 5326 /*---------*/ 5327 /* post receives of j-array */ 5328 for (i=0; i<nrecvs; i++) { 5329 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5330 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5331 } 5332 5333 /* pack the outgoing message j-array */ 5334 k = 0; 5335 for (i=0; i<nsends; i++) { 5336 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5337 bufJ = bufj+sstartsj[i]; 5338 for (j=0; j<nrows; j++) { 5339 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5340 for (ll=0; ll<sbs; ll++) { 5341 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5342 for (l=0; l<ncols; l++) { 5343 *bufJ++ = cols[l]; 5344 } 5345 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5346 } 5347 } 5348 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5349 } 5350 5351 /* recvs and sends of j-array are completed */ 5352 i = nrecvs; 5353 while (i--) { 5354 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5355 } 5356 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5357 } else if (scall == MAT_REUSE_MATRIX) { 5358 sstartsj = *startsj_s; 5359 rstartsj = *startsj_r; 5360 bufa = *bufa_ptr; 5361 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5362 b_otha = b_oth->a; 5363 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5364 5365 /* a-array */ 5366 /*---------*/ 5367 /* post receives of a-array */ 5368 for (i=0; i<nrecvs; i++) { 5369 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5370 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5371 } 5372 5373 /* pack the outgoing message a-array */ 5374 k = 0; 5375 for (i=0; i<nsends; i++) { 5376 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5377 bufA = bufa+sstartsj[i]; 5378 for (j=0; j<nrows; j++) { 5379 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5380 for (ll=0; ll<sbs; ll++) { 5381 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5382 for (l=0; l<ncols; l++) { 5383 *bufA++ = vals[l]; 5384 } 5385 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5386 } 5387 } 5388 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5389 } 5390 /* recvs and sends of a-array are completed */ 5391 i = nrecvs; 5392 while (i--) { 5393 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5394 } 5395 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5396 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5397 5398 if (scall == MAT_INITIAL_MATRIX) { 5399 /* put together the new matrix */ 5400 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5401 5402 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5403 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5404 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5405 b_oth->free_a = PETSC_TRUE; 5406 b_oth->free_ij = PETSC_TRUE; 5407 b_oth->nonew = 0; 5408 5409 ierr = PetscFree(bufj);CHKERRQ(ierr); 5410 if (!startsj_s || !bufa_ptr) { 5411 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5412 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5413 } else { 5414 *startsj_s = sstartsj; 5415 *startsj_r = rstartsj; 5416 *bufa_ptr = bufa; 5417 } 5418 } 5419 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5420 PetscFunctionReturn(0); 5421 } 5422 5423 #undef __FUNCT__ 5424 #define __FUNCT__ "MatGetCommunicationStructs" 5425 /*@C 5426 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5427 5428 Not Collective 5429 5430 Input Parameters: 5431 . A - The matrix in mpiaij format 5432 5433 Output Parameter: 5434 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5435 . colmap - A map from global column index to local index into lvec 5436 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5437 5438 Level: developer 5439 5440 @*/ 5441 #if defined(PETSC_USE_CTABLE) 5442 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5443 #else 5444 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5445 #endif 5446 { 5447 Mat_MPIAIJ *a; 5448 5449 PetscFunctionBegin; 5450 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5451 PetscValidPointer(lvec, 2); 5452 PetscValidPointer(colmap, 3); 5453 PetscValidPointer(multScatter, 4); 5454 a = (Mat_MPIAIJ*) A->data; 5455 if (lvec) *lvec = a->lvec; 5456 if (colmap) *colmap = a->colmap; 5457 if (multScatter) *multScatter = a->Mvctx; 5458 PetscFunctionReturn(0); 5459 } 5460 5461 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5462 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5463 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5464 5465 #undef __FUNCT__ 5466 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5467 /* 5468 Computes (B'*A')' since computing B*A directly is untenable 5469 5470 n p p 5471 ( ) ( ) ( ) 5472 m ( A ) * n ( B ) = m ( C ) 5473 ( ) ( ) ( ) 5474 5475 */ 5476 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5477 { 5478 PetscErrorCode ierr; 5479 Mat At,Bt,Ct; 5480 5481 PetscFunctionBegin; 5482 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5483 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5484 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5485 ierr = MatDestroy(&At);CHKERRQ(ierr); 5486 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5487 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5488 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5489 PetscFunctionReturn(0); 5490 } 5491 5492 #undef __FUNCT__ 5493 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5494 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5495 { 5496 PetscErrorCode ierr; 5497 PetscInt m=A->rmap->n,n=B->cmap->n; 5498 Mat Cmat; 5499 5500 PetscFunctionBegin; 5501 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5502 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5503 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5504 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5505 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5506 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5507 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5508 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5509 5510 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5511 5512 *C = Cmat; 5513 PetscFunctionReturn(0); 5514 } 5515 5516 /* ----------------------------------------------------------------*/ 5517 #undef __FUNCT__ 5518 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5519 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5520 { 5521 PetscErrorCode ierr; 5522 5523 PetscFunctionBegin; 5524 if (scall == MAT_INITIAL_MATRIX) { 5525 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5526 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5527 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5528 } 5529 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5530 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5531 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5532 PetscFunctionReturn(0); 5533 } 5534 5535 #if defined(PETSC_HAVE_MUMPS) 5536 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5537 #endif 5538 #if defined(PETSC_HAVE_PASTIX) 5539 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5540 #endif 5541 #if defined(PETSC_HAVE_SUPERLU_DIST) 5542 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5543 #endif 5544 #if defined(PETSC_HAVE_CLIQUE) 5545 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5546 #endif 5547 5548 /*MC 5549 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5550 5551 Options Database Keys: 5552 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5553 5554 Level: beginner 5555 5556 .seealso: MatCreateAIJ() 5557 M*/ 5558 5559 #undef __FUNCT__ 5560 #define __FUNCT__ "MatCreate_MPIAIJ" 5561 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5562 { 5563 Mat_MPIAIJ *b; 5564 PetscErrorCode ierr; 5565 PetscMPIInt size; 5566 5567 PetscFunctionBegin; 5568 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5569 5570 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5571 B->data = (void*)b; 5572 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5573 B->assembled = PETSC_FALSE; 5574 B->insertmode = NOT_SET_VALUES; 5575 b->size = size; 5576 5577 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5578 5579 /* build cache for off array entries formed */ 5580 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5581 5582 b->donotstash = PETSC_FALSE; 5583 b->colmap = 0; 5584 b->garray = 0; 5585 b->roworiented = PETSC_TRUE; 5586 5587 /* stuff used for matrix vector multiply */ 5588 b->lvec = NULL; 5589 b->Mvctx = NULL; 5590 5591 /* stuff for MatGetRow() */ 5592 b->rowindices = 0; 5593 b->rowvalues = 0; 5594 b->getrowactive = PETSC_FALSE; 5595 5596 /* flexible pointer used in CUSP/CUSPARSE classes */ 5597 b->spptr = NULL; 5598 5599 #if defined(PETSC_HAVE_MUMPS) 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5601 #endif 5602 #if defined(PETSC_HAVE_PASTIX) 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5604 #endif 5605 #if defined(PETSC_HAVE_SUPERLU_DIST) 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5607 #endif 5608 #if defined(PETSC_HAVE_CLIQUE) 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5610 #endif 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5622 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5624 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5625 PetscFunctionReturn(0); 5626 } 5627 5628 #undef __FUNCT__ 5629 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5630 /*@ 5631 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5632 and "off-diagonal" part of the matrix in CSR format. 5633 5634 Collective on MPI_Comm 5635 5636 Input Parameters: 5637 + comm - MPI communicator 5638 . m - number of local rows (Cannot be PETSC_DECIDE) 5639 . n - This value should be the same as the local size used in creating the 5640 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5641 calculated if N is given) For square matrices n is almost always m. 5642 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5643 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5644 . i - row indices for "diagonal" portion of matrix 5645 . j - column indices 5646 . a - matrix values 5647 . oi - row indices for "off-diagonal" portion of matrix 5648 . oj - column indices 5649 - oa - matrix values 5650 5651 Output Parameter: 5652 . mat - the matrix 5653 5654 Level: advanced 5655 5656 Notes: 5657 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5658 must free the arrays once the matrix has been destroyed and not before. 5659 5660 The i and j indices are 0 based 5661 5662 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5663 5664 This sets local rows and cannot be used to set off-processor values. 5665 5666 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5667 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5668 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5669 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5670 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5671 communication if it is known that only local entries will be set. 5672 5673 .keywords: matrix, aij, compressed row, sparse, parallel 5674 5675 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5676 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5677 @*/ 5678 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5679 { 5680 PetscErrorCode ierr; 5681 Mat_MPIAIJ *maij; 5682 5683 PetscFunctionBegin; 5684 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5685 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5686 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5687 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5688 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5689 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5690 maij = (Mat_MPIAIJ*) (*mat)->data; 5691 5692 (*mat)->preallocated = PETSC_TRUE; 5693 5694 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5695 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5696 5697 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5698 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5699 5700 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5701 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5702 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5703 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5704 5705 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5706 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5707 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5708 PetscFunctionReturn(0); 5709 } 5710 5711 /* 5712 Special version for direct calls from Fortran 5713 */ 5714 #include <petsc-private/fortranimpl.h> 5715 5716 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5717 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5718 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5719 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5720 #endif 5721 5722 /* Change these macros so can be used in void function */ 5723 #undef CHKERRQ 5724 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5725 #undef SETERRQ2 5726 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5727 #undef SETERRQ3 5728 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5729 #undef SETERRQ 5730 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5731 5732 #undef __FUNCT__ 5733 #define __FUNCT__ "matsetvaluesmpiaij_" 5734 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5735 { 5736 Mat mat = *mmat; 5737 PetscInt m = *mm, n = *mn; 5738 InsertMode addv = *maddv; 5739 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5740 PetscScalar value; 5741 PetscErrorCode ierr; 5742 5743 MatCheckPreallocated(mat,1); 5744 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5745 5746 #if defined(PETSC_USE_DEBUG) 5747 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5748 #endif 5749 { 5750 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5751 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5752 PetscBool roworiented = aij->roworiented; 5753 5754 /* Some Variables required in the macro */ 5755 Mat A = aij->A; 5756 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5757 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5758 MatScalar *aa = a->a; 5759 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5760 Mat B = aij->B; 5761 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5762 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5763 MatScalar *ba = b->a; 5764 5765 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5766 PetscInt nonew = a->nonew; 5767 MatScalar *ap1,*ap2; 5768 5769 PetscFunctionBegin; 5770 for (i=0; i<m; i++) { 5771 if (im[i] < 0) continue; 5772 #if defined(PETSC_USE_DEBUG) 5773 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5774 #endif 5775 if (im[i] >= rstart && im[i] < rend) { 5776 row = im[i] - rstart; 5777 lastcol1 = -1; 5778 rp1 = aj + ai[row]; 5779 ap1 = aa + ai[row]; 5780 rmax1 = aimax[row]; 5781 nrow1 = ailen[row]; 5782 low1 = 0; 5783 high1 = nrow1; 5784 lastcol2 = -1; 5785 rp2 = bj + bi[row]; 5786 ap2 = ba + bi[row]; 5787 rmax2 = bimax[row]; 5788 nrow2 = bilen[row]; 5789 low2 = 0; 5790 high2 = nrow2; 5791 5792 for (j=0; j<n; j++) { 5793 if (roworiented) value = v[i*n+j]; 5794 else value = v[i+j*m]; 5795 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5796 if (in[j] >= cstart && in[j] < cend) { 5797 col = in[j] - cstart; 5798 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5799 } else if (in[j] < 0) continue; 5800 #if defined(PETSC_USE_DEBUG) 5801 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5802 #endif 5803 else { 5804 if (mat->was_assembled) { 5805 if (!aij->colmap) { 5806 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5807 } 5808 #if defined(PETSC_USE_CTABLE) 5809 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5810 col--; 5811 #else 5812 col = aij->colmap[in[j]] - 1; 5813 #endif 5814 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5815 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5816 col = in[j]; 5817 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5818 B = aij->B; 5819 b = (Mat_SeqAIJ*)B->data; 5820 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5821 rp2 = bj + bi[row]; 5822 ap2 = ba + bi[row]; 5823 rmax2 = bimax[row]; 5824 nrow2 = bilen[row]; 5825 low2 = 0; 5826 high2 = nrow2; 5827 bm = aij->B->rmap->n; 5828 ba = b->a; 5829 } 5830 } else col = in[j]; 5831 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5832 } 5833 } 5834 } else if (!aij->donotstash) { 5835 if (roworiented) { 5836 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5837 } else { 5838 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5839 } 5840 } 5841 } 5842 } 5843 PetscFunctionReturnVoid(); 5844 } 5845 5846