1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 if (!rank) { 206 bses[0] = gmat->rmap->bs; 207 bses[1] = gmat->cmap->bs; 208 } 209 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 210 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 211 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 212 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 213 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 214 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 215 216 rowners[0] = 0; 217 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 218 rstart = rowners[rank]; 219 rend = rowners[rank+1]; 220 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 221 if (!rank) { 222 gmata = (Mat_SeqAIJ*) gmat->data; 223 /* send row lengths to all processors */ 224 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 225 for (i=1; i<size; i++) { 226 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 227 } 228 /* determine number diagonal and off-diagonal counts */ 229 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 230 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 264 jj = 0; 265 for (i=0; i<m; i++) { 266 for (j=0; j<dlens[i]; j++) { 267 if (gmataj[jj] < rstart) ld[i]++; 268 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 269 jj++; 270 } 271 } 272 /* receive numerical values */ 273 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 275 } 276 /* set preallocation */ 277 for (i=0; i<m; i++) { 278 dlens[i] -= olens[i]; 279 } 280 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 281 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 282 283 for (i=0; i<m; i++) { 284 dlens[i] += olens[i]; 285 } 286 cnt = 0; 287 for (i=0; i<m; i++) { 288 row = rstart + i; 289 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 290 cnt += dlens[i]; 291 } 292 if (rank) { 293 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 294 } 295 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 296 ierr = PetscFree(rowners);CHKERRQ(ierr); 297 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 300 *inmat = mat; 301 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 302 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 303 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 304 mat = *inmat; 305 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 306 if (!rank) { 307 /* send numerical values to other processes */ 308 gmata = (Mat_SeqAIJ*) gmat->data; 309 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 310 gmataa = gmata->a; 311 for (i=1; i<size; i++) { 312 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 313 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 314 } 315 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 316 } else { 317 /* receive numerical values from process 0*/ 318 nz = Ad->nz + Ao->nz; 319 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 320 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 321 } 322 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 323 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 324 ad = Ad->a; 325 ao = Ao->a; 326 if (mat->rmap->n) { 327 i = 0; 328 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 329 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 330 } 331 for (i=1; i<mat->rmap->n; i++) { 332 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 333 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 334 } 335 i--; 336 if (mat->rmap->n) { 337 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 338 } 339 if (rank) { 340 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 341 } 342 } 343 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined(PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++) { 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 373 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 374 #endif 375 PetscFunctionReturn(0); 376 } 377 378 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 379 { \ 380 if (col <= lastcol1) low1 = 0; \ 381 else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; \ 416 else high2 = nrow2; \ 417 lastcol2 = col; \ 418 while (high2-low2 > 5) { \ 419 t = (low2+high2)/2; \ 420 if (rp2[t] > col) high2 = t; \ 421 else low2 = t; \ 422 } \ 423 for (_i=low2; _i<high2; _i++) { \ 424 if (rp2[_i] > col) break; \ 425 if (rp2[_i] == col) { \ 426 if (addv == ADD_VALUES) ap2[_i] += value; \ 427 else ap2[_i] = value; \ 428 goto b_noinsert; \ 429 } \ 430 } \ 431 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 433 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 434 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 435 N = nrow2++ - 1; b->nz++; high2++; \ 436 /* shift up all the later entries in this row */ \ 437 for (ii=N; ii>=_i; ii--) { \ 438 rp2[ii+1] = rp2[ii]; \ 439 ap2[ii+1] = ap2[ii]; \ 440 } \ 441 rp2[_i] = col; \ 442 ap2[_i] = value; \ 443 b_noinsert: ; \ 444 bilen[row] = nrow2; \ 445 } 446 447 #undef __FUNCT__ 448 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 449 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 450 { 451 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 452 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 453 PetscErrorCode ierr; 454 PetscInt l,*garray = mat->garray,diag; 455 456 PetscFunctionBegin; 457 /* code only works for square matrices A */ 458 459 /* find size of row to the left of the diagonal part */ 460 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 461 row = row - diag; 462 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 463 if (garray[b->j[b->i[row]+l]] > diag) break; 464 } 465 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 466 467 /* diagonal part */ 468 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 469 470 /* right of diagonal part */ 471 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatSetValues_MPIAIJ" 477 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 478 { 479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 480 PetscScalar value; 481 PetscErrorCode ierr; 482 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 483 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 484 PetscBool roworiented = aij->roworiented; 485 486 /* Some Variables required in the macro */ 487 Mat A = aij->A; 488 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 489 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 490 MatScalar *aa = a->a; 491 PetscBool ignorezeroentries = a->ignorezeroentries; 492 Mat B = aij->B; 493 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 494 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 495 MatScalar *ba = b->a; 496 497 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 498 PetscInt nonew; 499 MatScalar *ap1,*ap2; 500 501 PetscFunctionBegin; 502 if (v) PetscValidScalarPointer(v,6); 503 for (i=0; i<m; i++) { 504 if (im[i] < 0) continue; 505 #if defined(PETSC_USE_DEBUG) 506 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 507 #endif 508 if (im[i] >= rstart && im[i] < rend) { 509 row = im[i] - rstart; 510 lastcol1 = -1; 511 rp1 = aj + ai[row]; 512 ap1 = aa + ai[row]; 513 rmax1 = aimax[row]; 514 nrow1 = ailen[row]; 515 low1 = 0; 516 high1 = nrow1; 517 lastcol2 = -1; 518 rp2 = bj + bi[row]; 519 ap2 = ba + bi[row]; 520 rmax2 = bimax[row]; 521 nrow2 = bilen[row]; 522 low2 = 0; 523 high2 = nrow2; 524 525 for (j=0; j<n; j++) { 526 if (v) { 527 if (roworiented) value = v[i*n+j]; 528 else value = v[i+j*m]; 529 } else value = 0.0; 530 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 531 if (in[j] >= cstart && in[j] < cend) { 532 col = in[j] - cstart; 533 nonew = a->nonew; 534 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 535 } else if (in[j] < 0) continue; 536 #if defined(PETSC_USE_DEBUG) 537 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 538 #endif 539 else { 540 if (mat->was_assembled) { 541 if (!aij->colmap) { 542 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 543 } 544 #if defined(PETSC_USE_CTABLE) 545 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 546 col--; 547 #else 548 col = aij->colmap[in[j]] - 1; 549 #endif 550 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 551 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 552 col = in[j]; 553 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 554 B = aij->B; 555 b = (Mat_SeqAIJ*)B->data; 556 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 557 rp2 = bj + bi[row]; 558 ap2 = ba + bi[row]; 559 rmax2 = bimax[row]; 560 nrow2 = bilen[row]; 561 low2 = 0; 562 high2 = nrow2; 563 bm = aij->B->rmap->n; 564 ba = b->a; 565 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 566 } else col = in[j]; 567 nonew = b->nonew; 568 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 569 } 570 } 571 } else { 572 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 573 if (!aij->donotstash) { 574 mat->assembled = PETSC_FALSE; 575 if (roworiented) { 576 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 577 } else { 578 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 579 } 580 } 581 } 582 } 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatGetValues_MPIAIJ" 588 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 589 { 590 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 591 PetscErrorCode ierr; 592 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 593 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 594 595 PetscFunctionBegin; 596 for (i=0; i<m; i++) { 597 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 598 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 599 if (idxm[i] >= rstart && idxm[i] < rend) { 600 row = idxm[i] - rstart; 601 for (j=0; j<n; j++) { 602 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 603 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 604 if (idxn[j] >= cstart && idxn[j] < cend) { 605 col = idxn[j] - cstart; 606 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 607 } else { 608 if (!aij->colmap) { 609 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 610 } 611 #if defined(PETSC_USE_CTABLE) 612 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 613 col--; 614 #else 615 col = aij->colmap[idxn[j]] - 1; 616 #endif 617 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 618 else { 619 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } 621 } 622 } 623 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 624 } 625 PetscFunctionReturn(0); 626 } 627 628 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 632 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 633 { 634 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 635 PetscErrorCode ierr; 636 PetscInt nstash,reallocs; 637 InsertMode addv; 638 639 PetscFunctionBegin; 640 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 668 669 PetscFunctionBegin; 670 if (!aij->donotstash && !mat->nooffprocentries) { 671 while (1) { 672 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 673 if (!flg) break; 674 675 for (i=0; i<n; ) { 676 /* Now identify the consecutive vals belonging to the same row */ 677 for (j=i,rstart=row[j]; j<n; j++) { 678 if (row[j] != rstart) break; 679 } 680 if (j < n) ncols = j-i; 681 else ncols = n-i; 682 /* Now assemble all these values with a single function call */ 683 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 684 685 i = j; 686 } 687 } 688 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 689 } 690 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 692 693 /* determine if any processor has disassembled, if so we must 694 also disassemble ourselfs, in order that we may reassemble. */ 695 /* 696 if nonzero structure of submatrix B cannot change then we know that 697 no processor disassembled thus we can skip this stuff 698 */ 699 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 700 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 701 if (mat->was_assembled && !other_disassembled) { 702 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 } 705 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 706 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 709 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 710 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 711 712 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 713 714 aij->rowvalues = 0; 715 716 /* used by MatAXPY() */ 717 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 718 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 719 720 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 721 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 727 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 728 { 729 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 734 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatZeroRows_MPIAIJ" 740 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 741 { 742 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 743 PetscInt *owners = A->rmap->range; 744 PetscInt n = A->rmap->n; 745 PetscMPIInt size = mat->size; 746 PetscSF sf; 747 PetscInt *lrows; 748 PetscSFNode *rrows; 749 PetscInt lastidx = -1, r, p = 0, len = 0; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 /* Create SF where leaves are input rows and roots are owned rows */ 754 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 755 for (r = 0; r < n; ++r) lrows[r] = -1; 756 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 757 for (r = 0; r < N; ++r) { 758 const PetscInt idx = rows[r]; 759 PetscBool found = PETSC_FALSE; 760 /* Trick for efficient searching for sorted rows */ 761 if (lastidx > idx) p = 0; 762 lastidx = idx; 763 for (; p < size; ++p) { 764 if (idx >= owners[p] && idx < owners[p+1]) { 765 rrows[r].rank = p; 766 rrows[r].index = rows[r] - owners[p]; 767 found = PETSC_TRUE; 768 break; 769 } 770 } 771 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 772 } 773 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 774 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 775 /* Collect flags for rows to be zeroed */ 776 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 777 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 778 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 779 /* Compress and put in row numbers */ 780 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 781 /* fix right hand side if needed */ 782 if (x && b) { 783 const PetscScalar *xx; 784 PetscScalar *bb; 785 786 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 787 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 788 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 789 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 790 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 791 } 792 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 793 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 794 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 795 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 796 } else if (diag != 0.0) { 797 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 798 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 799 for (r = 0; r < len; ++r) { 800 const PetscInt row = lrows[r] + A->rmap->rstart; 801 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 802 } 803 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 804 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 805 } else { 806 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 807 } 808 ierr = PetscFree(lrows);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 814 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 815 { 816 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 817 PetscErrorCode ierr; 818 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 819 PetscInt i,j,r,m,p = 0,len = 0; 820 PetscInt *lrows,*owners = A->rmap->range; 821 PetscSFNode *rrows; 822 PetscSF sf; 823 const PetscScalar *xx; 824 PetscScalar *bb,*mask; 825 Vec xmask,lmask; 826 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 827 const PetscInt *aj, *ii,*ridx; 828 PetscScalar *aa; 829 #if defined(PETSC_DEBUG) 830 PetscBool found = PETSC_FALSE; 831 #endif 832 833 PetscFunctionBegin; 834 /* Create SF where leaves are input rows and roots are owned rows */ 835 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 836 for (r = 0; r < n; ++r) lrows[r] = -1; 837 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 838 for (r = 0; r < N; ++r) { 839 const PetscInt idx = rows[r]; 840 PetscBool found = PETSC_FALSE; 841 /* Trick for efficient searching for sorted rows */ 842 if (lastidx > idx) p = 0; 843 lastidx = idx; 844 for (; p < size; ++p) { 845 if (idx >= owners[p] && idx < owners[p+1]) { 846 rrows[r].rank = p; 847 rrows[r].index = rows[r] - owners[p]; 848 found = PETSC_TRUE; 849 break; 850 } 851 } 852 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 853 } 854 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 855 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 856 /* Collect flags for rows to be zeroed */ 857 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 858 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 859 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 860 /* Compress and put in row numbers */ 861 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 862 /* zero diagonal part of matrix */ 863 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 864 /* handle off diagonal part of matrix */ 865 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 866 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 867 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 868 for (i=0; i<len; i++) bb[lrows[i]] = 1; 869 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 870 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 873 if (x) { 874 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 877 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 878 } 879 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 880 /* remove zeroed rows of off diagonal matrix */ 881 ii = aij->i; 882 for (i=0; i<len; i++) { 883 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 884 } 885 /* loop over all elements of off process part of matrix zeroing removed columns*/ 886 if (aij->compressedrow.use) { 887 m = aij->compressedrow.nrows; 888 ii = aij->compressedrow.i; 889 ridx = aij->compressedrow.rindex; 890 for (i=0; i<m; i++) { 891 n = ii[i+1] - ii[i]; 892 aj = aij->j + ii[i]; 893 aa = aij->a + ii[i]; 894 895 for (j=0; j<n; j++) { 896 if (PetscAbsScalar(mask[*aj])) { 897 if (b) bb[*ridx] -= *aa*xx[*aj]; 898 *aa = 0.0; 899 } 900 aa++; 901 aj++; 902 } 903 ridx++; 904 } 905 } else { /* do not use compressed row format */ 906 m = l->B->rmap->n; 907 for (i=0; i<m; i++) { 908 n = ii[i+1] - ii[i]; 909 aj = aij->j + ii[i]; 910 aa = aij->a + ii[i]; 911 for (j=0; j<n; j++) { 912 if (PetscAbsScalar(mask[*aj])) { 913 if (b) bb[i] -= *aa*xx[*aj]; 914 *aa = 0.0; 915 } 916 aa++; 917 aj++; 918 } 919 } 920 } 921 if (x) { 922 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 923 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 924 } 925 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 926 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 927 ierr = PetscFree(lrows);CHKERRQ(ierr); 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatMult_MPIAIJ" 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 939 PetscFunctionBegin; 940 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 941 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 942 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 944 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 952 { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 #undef __FUNCT__ 962 #define __FUNCT__ "MatMultAdd_MPIAIJ" 963 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 964 { 965 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 966 PetscErrorCode ierr; 967 968 PetscFunctionBegin; 969 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 970 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 971 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 #undef __FUNCT__ 977 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 978 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 979 { 980 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 981 PetscErrorCode ierr; 982 PetscBool merged; 983 984 PetscFunctionBegin; 985 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 986 /* do nondiagonal part */ 987 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 988 if (!merged) { 989 /* send it on its way */ 990 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 991 /* do local part */ 992 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 993 /* receive remote parts: note this assumes the values are not actually */ 994 /* added in yy until the next line, */ 995 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 996 } else { 997 /* do local part */ 998 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 999 /* send it on its way */ 1000 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1001 /* values actually were received in the Begin() but we need to call this nop */ 1002 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1009 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1010 { 1011 MPI_Comm comm; 1012 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1013 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1014 IS Me,Notme; 1015 PetscErrorCode ierr; 1016 PetscInt M,N,first,last,*notme,i; 1017 PetscMPIInt size; 1018 1019 PetscFunctionBegin; 1020 /* Easy test: symmetric diagonal block */ 1021 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1022 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1023 if (!*f) PetscFunctionReturn(0); 1024 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1025 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1026 if (size == 1) PetscFunctionReturn(0); 1027 1028 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1029 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1030 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1031 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1032 for (i=0; i<first; i++) notme[i] = i; 1033 for (i=last; i<M; i++) notme[i-last+first] = i; 1034 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1035 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1036 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1037 Aoff = Aoffs[0]; 1038 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1039 Boff = Boffs[0]; 1040 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1041 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1042 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1043 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1044 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1045 ierr = PetscFree(notme);CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1051 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1052 { 1053 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1054 PetscErrorCode ierr; 1055 1056 PetscFunctionBegin; 1057 /* do nondiagonal part */ 1058 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1059 /* send it on its way */ 1060 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1061 /* do local part */ 1062 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1063 /* receive remote parts */ 1064 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 /* 1069 This only works correctly for square matrices where the subblock A->A is the 1070 diagonal block 1071 */ 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1074 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1075 { 1076 PetscErrorCode ierr; 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 1079 PetscFunctionBegin; 1080 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1081 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1082 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatScale_MPIAIJ" 1088 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1089 { 1090 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 1093 PetscFunctionBegin; 1094 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1095 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatDestroy_MPIAIJ" 1101 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1102 { 1103 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 #if defined(PETSC_USE_LOG) 1108 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1109 #endif 1110 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1111 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1112 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1113 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1114 #if defined(PETSC_USE_CTABLE) 1115 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1116 #else 1117 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1118 #endif 1119 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1120 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1121 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1122 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1123 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1124 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1125 1126 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1134 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1140 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1141 { 1142 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1143 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1144 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1145 PetscErrorCode ierr; 1146 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1147 int fd; 1148 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1149 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1150 PetscScalar *column_values; 1151 PetscInt message_count,flowcontrolcount; 1152 FILE *file; 1153 1154 PetscFunctionBegin; 1155 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1156 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1157 nz = A->nz + B->nz; 1158 if (!rank) { 1159 header[0] = MAT_FILE_CLASSID; 1160 header[1] = mat->rmap->N; 1161 header[2] = mat->cmap->N; 1162 1163 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1164 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1165 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1166 /* get largest number of rows any processor has */ 1167 rlen = mat->rmap->n; 1168 range = mat->rmap->range; 1169 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1170 } else { 1171 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1172 rlen = mat->rmap->n; 1173 } 1174 1175 /* load up the local row counts */ 1176 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1177 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1178 1179 /* store the row lengths to the file */ 1180 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1181 if (!rank) { 1182 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1183 for (i=1; i<size; i++) { 1184 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1185 rlen = range[i+1] - range[i]; 1186 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1187 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1188 } 1189 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1190 } else { 1191 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1192 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1193 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1194 } 1195 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1196 1197 /* load up the local column indices */ 1198 nzmax = nz; /* th processor needs space a largest processor needs */ 1199 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1200 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1201 cnt = 0; 1202 for (i=0; i<mat->rmap->n; i++) { 1203 for (j=B->i[i]; j<B->i[i+1]; j++) { 1204 if ((col = garray[B->j[j]]) > cstart) break; 1205 column_indices[cnt++] = col; 1206 } 1207 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1208 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1209 } 1210 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1211 1212 /* store the column indices to the file */ 1213 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1214 if (!rank) { 1215 MPI_Status status; 1216 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1217 for (i=1; i<size; i++) { 1218 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1219 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1220 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1221 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1222 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1223 } 1224 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1225 } else { 1226 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1227 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1228 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1229 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1230 } 1231 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1232 1233 /* load up the local column values */ 1234 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1235 cnt = 0; 1236 for (i=0; i<mat->rmap->n; i++) { 1237 for (j=B->i[i]; j<B->i[i+1]; j++) { 1238 if (garray[B->j[j]] > cstart) break; 1239 column_values[cnt++] = B->a[j]; 1240 } 1241 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1242 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1243 } 1244 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1245 1246 /* store the column values to the file */ 1247 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1248 if (!rank) { 1249 MPI_Status status; 1250 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1251 for (i=1; i<size; i++) { 1252 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1253 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1254 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1255 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1256 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1257 } 1258 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1259 } else { 1260 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1261 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1262 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1263 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1264 } 1265 ierr = PetscFree(column_values);CHKERRQ(ierr); 1266 1267 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1268 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #include <petscdraw.h> 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1275 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1276 { 1277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1278 PetscErrorCode ierr; 1279 PetscMPIInt rank = aij->rank,size = aij->size; 1280 PetscBool isdraw,iascii,isbinary; 1281 PetscViewer sviewer; 1282 PetscViewerFormat format; 1283 1284 PetscFunctionBegin; 1285 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1286 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1287 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1288 if (iascii) { 1289 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1290 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1291 MatInfo info; 1292 PetscBool inodes; 1293 1294 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1295 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1296 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1297 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1298 if (!inodes) { 1299 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1300 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1301 } else { 1302 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1303 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1304 } 1305 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1306 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1307 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1308 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1309 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1310 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1312 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1315 PetscInt inodecount,inodelimit,*inodes; 1316 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1317 if (inodes) { 1318 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1319 } else { 1320 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1321 } 1322 PetscFunctionReturn(0); 1323 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1324 PetscFunctionReturn(0); 1325 } 1326 } else if (isbinary) { 1327 if (size == 1) { 1328 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1329 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1330 } else { 1331 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1332 } 1333 PetscFunctionReturn(0); 1334 } else if (isdraw) { 1335 PetscDraw draw; 1336 PetscBool isnull; 1337 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1338 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1339 } 1340 1341 if (size == 1) { 1342 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1343 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1344 } else { 1345 /* assemble the entire matrix onto first processor. */ 1346 Mat A; 1347 Mat_SeqAIJ *Aloc; 1348 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1349 MatScalar *a; 1350 1351 if (mat->rmap->N > 1024) { 1352 PetscBool flg = PETSC_FALSE; 1353 1354 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1355 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1356 } 1357 1358 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1359 if (!rank) { 1360 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1361 } else { 1362 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1363 } 1364 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1365 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1366 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1367 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1368 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1369 1370 /* copy over the A part */ 1371 Aloc = (Mat_SeqAIJ*)aij->A->data; 1372 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1373 row = mat->rmap->rstart; 1374 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1375 for (i=0; i<m; i++) { 1376 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1377 row++; 1378 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1379 } 1380 aj = Aloc->j; 1381 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1382 1383 /* copy over the B part */ 1384 Aloc = (Mat_SeqAIJ*)aij->B->data; 1385 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1386 row = mat->rmap->rstart; 1387 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1388 ct = cols; 1389 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1390 for (i=0; i<m; i++) { 1391 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1392 row++; 1393 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1394 } 1395 ierr = PetscFree(ct);CHKERRQ(ierr); 1396 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1398 /* 1399 Everyone has to call to draw the matrix since the graphics waits are 1400 synchronized across all processors that share the PetscDraw object 1401 */ 1402 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1403 if (!rank) { 1404 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1405 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1406 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1407 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1408 } 1409 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1410 ierr = MatDestroy(&A);CHKERRQ(ierr); 1411 } 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatView_MPIAIJ" 1417 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1418 { 1419 PetscErrorCode ierr; 1420 PetscBool iascii,isdraw,issocket,isbinary; 1421 1422 PetscFunctionBegin; 1423 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1424 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1425 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1426 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1427 if (iascii || isdraw || isbinary || issocket) { 1428 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatSOR_MPIAIJ" 1435 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1436 { 1437 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1438 PetscErrorCode ierr; 1439 Vec bb1 = 0; 1440 PetscBool hasop; 1441 1442 PetscFunctionBegin; 1443 if (flag == SOR_APPLY_UPPER) { 1444 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1449 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1450 } 1451 1452 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1453 if (flag & SOR_ZERO_INITIAL_GUESS) { 1454 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1455 its--; 1456 } 1457 1458 while (its--) { 1459 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1460 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1461 1462 /* update rhs: bb1 = bb - B*x */ 1463 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1464 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1465 1466 /* local sweep */ 1467 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1468 } 1469 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1470 if (flag & SOR_ZERO_INITIAL_GUESS) { 1471 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1472 its--; 1473 } 1474 while (its--) { 1475 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1476 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1477 1478 /* update rhs: bb1 = bb - B*x */ 1479 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1480 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1481 1482 /* local sweep */ 1483 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1484 } 1485 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1486 if (flag & SOR_ZERO_INITIAL_GUESS) { 1487 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1488 its--; 1489 } 1490 while (its--) { 1491 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1492 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1493 1494 /* update rhs: bb1 = bb - B*x */ 1495 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1496 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1497 1498 /* local sweep */ 1499 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1500 } 1501 } else if (flag & SOR_EISENSTAT) { 1502 Vec xx1; 1503 1504 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1505 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1506 1507 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1508 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1509 if (!mat->diag) { 1510 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1511 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1512 } 1513 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1514 if (hasop) { 1515 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1516 } else { 1517 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1518 } 1519 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1520 1521 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1522 1523 /* local sweep */ 1524 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1525 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1526 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1527 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1528 1529 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatPermute_MPIAIJ" 1535 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1536 { 1537 Mat aA,aB,Aperm; 1538 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1539 PetscScalar *aa,*ba; 1540 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1541 PetscSF rowsf,sf; 1542 IS parcolp = NULL; 1543 PetscBool done; 1544 PetscErrorCode ierr; 1545 1546 PetscFunctionBegin; 1547 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1548 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1549 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1550 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1551 1552 /* Invert row permutation to find out where my rows should go */ 1553 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1554 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1555 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1556 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1557 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1558 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1559 1560 /* Invert column permutation to find out where my columns should go */ 1561 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1562 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1563 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1564 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1565 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1566 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1567 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1568 1569 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1570 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1571 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1572 1573 /* Find out where my gcols should go */ 1574 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1575 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1576 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1577 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1578 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1579 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1580 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1581 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1582 1583 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1584 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1585 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1586 for (i=0; i<m; i++) { 1587 PetscInt row = rdest[i],rowner; 1588 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1589 for (j=ai[i]; j<ai[i+1]; j++) { 1590 PetscInt cowner,col = cdest[aj[j]]; 1591 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1592 if (rowner == cowner) dnnz[i]++; 1593 else onnz[i]++; 1594 } 1595 for (j=bi[i]; j<bi[i+1]; j++) { 1596 PetscInt cowner,col = gcdest[bj[j]]; 1597 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1598 if (rowner == cowner) dnnz[i]++; 1599 else onnz[i]++; 1600 } 1601 } 1602 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1603 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1604 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1605 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1606 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1607 1608 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1609 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1610 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1611 for (i=0; i<m; i++) { 1612 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1613 PetscInt j0,rowlen; 1614 rowlen = ai[i+1] - ai[i]; 1615 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1616 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1617 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1618 } 1619 rowlen = bi[i+1] - bi[i]; 1620 for (j0=j=0; j<rowlen; j0=j) { 1621 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1622 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1623 } 1624 } 1625 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1626 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1627 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1628 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1629 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1630 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1631 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1632 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1633 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1634 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1635 *B = Aperm; 1636 PetscFunctionReturn(0); 1637 } 1638 1639 #undef __FUNCT__ 1640 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1641 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1642 { 1643 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1644 Mat A = mat->A,B = mat->B; 1645 PetscErrorCode ierr; 1646 PetscReal isend[5],irecv[5]; 1647 1648 PetscFunctionBegin; 1649 info->block_size = 1.0; 1650 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1651 1652 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1653 isend[3] = info->memory; isend[4] = info->mallocs; 1654 1655 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1656 1657 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1658 isend[3] += info->memory; isend[4] += info->mallocs; 1659 if (flag == MAT_LOCAL) { 1660 info->nz_used = isend[0]; 1661 info->nz_allocated = isend[1]; 1662 info->nz_unneeded = isend[2]; 1663 info->memory = isend[3]; 1664 info->mallocs = isend[4]; 1665 } else if (flag == MAT_GLOBAL_MAX) { 1666 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1667 1668 info->nz_used = irecv[0]; 1669 info->nz_allocated = irecv[1]; 1670 info->nz_unneeded = irecv[2]; 1671 info->memory = irecv[3]; 1672 info->mallocs = irecv[4]; 1673 } else if (flag == MAT_GLOBAL_SUM) { 1674 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1675 1676 info->nz_used = irecv[0]; 1677 info->nz_allocated = irecv[1]; 1678 info->nz_unneeded = irecv[2]; 1679 info->memory = irecv[3]; 1680 info->mallocs = irecv[4]; 1681 } 1682 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1683 info->fill_ratio_needed = 0; 1684 info->factor_mallocs = 0; 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatSetOption_MPIAIJ" 1690 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1691 { 1692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 switch (op) { 1697 case MAT_NEW_NONZERO_LOCATIONS: 1698 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1699 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1700 case MAT_KEEP_NONZERO_PATTERN: 1701 case MAT_NEW_NONZERO_LOCATION_ERR: 1702 case MAT_USE_INODES: 1703 case MAT_IGNORE_ZERO_ENTRIES: 1704 MatCheckPreallocated(A,1); 1705 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1706 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1707 break; 1708 case MAT_ROW_ORIENTED: 1709 a->roworiented = flg; 1710 1711 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1712 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1713 break; 1714 case MAT_NEW_DIAGONALS: 1715 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1716 break; 1717 case MAT_IGNORE_OFF_PROC_ENTRIES: 1718 a->donotstash = flg; 1719 break; 1720 case MAT_SPD: 1721 A->spd_set = PETSC_TRUE; 1722 A->spd = flg; 1723 if (flg) { 1724 A->symmetric = PETSC_TRUE; 1725 A->structurally_symmetric = PETSC_TRUE; 1726 A->symmetric_set = PETSC_TRUE; 1727 A->structurally_symmetric_set = PETSC_TRUE; 1728 } 1729 break; 1730 case MAT_SYMMETRIC: 1731 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1732 break; 1733 case MAT_STRUCTURALLY_SYMMETRIC: 1734 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1735 break; 1736 case MAT_HERMITIAN: 1737 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1738 break; 1739 case MAT_SYMMETRY_ETERNAL: 1740 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1741 break; 1742 default: 1743 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1744 } 1745 PetscFunctionReturn(0); 1746 } 1747 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatGetRow_MPIAIJ" 1750 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1751 { 1752 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1753 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1754 PetscErrorCode ierr; 1755 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1756 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1757 PetscInt *cmap,*idx_p; 1758 1759 PetscFunctionBegin; 1760 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1761 mat->getrowactive = PETSC_TRUE; 1762 1763 if (!mat->rowvalues && (idx || v)) { 1764 /* 1765 allocate enough space to hold information from the longest row. 1766 */ 1767 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1768 PetscInt max = 1,tmp; 1769 for (i=0; i<matin->rmap->n; i++) { 1770 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1771 if (max < tmp) max = tmp; 1772 } 1773 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1774 } 1775 1776 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1777 lrow = row - rstart; 1778 1779 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1780 if (!v) {pvA = 0; pvB = 0;} 1781 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1782 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1783 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1784 nztot = nzA + nzB; 1785 1786 cmap = mat->garray; 1787 if (v || idx) { 1788 if (nztot) { 1789 /* Sort by increasing column numbers, assuming A and B already sorted */ 1790 PetscInt imark = -1; 1791 if (v) { 1792 *v = v_p = mat->rowvalues; 1793 for (i=0; i<nzB; i++) { 1794 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1795 else break; 1796 } 1797 imark = i; 1798 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1799 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1800 } 1801 if (idx) { 1802 *idx = idx_p = mat->rowindices; 1803 if (imark > -1) { 1804 for (i=0; i<imark; i++) { 1805 idx_p[i] = cmap[cworkB[i]]; 1806 } 1807 } else { 1808 for (i=0; i<nzB; i++) { 1809 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1810 else break; 1811 } 1812 imark = i; 1813 } 1814 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1815 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1816 } 1817 } else { 1818 if (idx) *idx = 0; 1819 if (v) *v = 0; 1820 } 1821 } 1822 *nz = nztot; 1823 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1824 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1830 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1831 { 1832 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1833 1834 PetscFunctionBegin; 1835 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1836 aij->getrowactive = PETSC_FALSE; 1837 PetscFunctionReturn(0); 1838 } 1839 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatNorm_MPIAIJ" 1842 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1843 { 1844 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1845 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1846 PetscErrorCode ierr; 1847 PetscInt i,j,cstart = mat->cmap->rstart; 1848 PetscReal sum = 0.0; 1849 MatScalar *v; 1850 1851 PetscFunctionBegin; 1852 if (aij->size == 1) { 1853 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1854 } else { 1855 if (type == NORM_FROBENIUS) { 1856 v = amat->a; 1857 for (i=0; i<amat->nz; i++) { 1858 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1859 } 1860 v = bmat->a; 1861 for (i=0; i<bmat->nz; i++) { 1862 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1863 } 1864 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1865 *norm = PetscSqrtReal(*norm); 1866 } else if (type == NORM_1) { /* max column norm */ 1867 PetscReal *tmp,*tmp2; 1868 PetscInt *jj,*garray = aij->garray; 1869 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1870 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1871 *norm = 0.0; 1872 v = amat->a; jj = amat->j; 1873 for (j=0; j<amat->nz; j++) { 1874 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1875 } 1876 v = bmat->a; jj = bmat->j; 1877 for (j=0; j<bmat->nz; j++) { 1878 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1879 } 1880 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1881 for (j=0; j<mat->cmap->N; j++) { 1882 if (tmp2[j] > *norm) *norm = tmp2[j]; 1883 } 1884 ierr = PetscFree(tmp);CHKERRQ(ierr); 1885 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1886 } else if (type == NORM_INFINITY) { /* max row norm */ 1887 PetscReal ntemp = 0.0; 1888 for (j=0; j<aij->A->rmap->n; j++) { 1889 v = amat->a + amat->i[j]; 1890 sum = 0.0; 1891 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1892 sum += PetscAbsScalar(*v); v++; 1893 } 1894 v = bmat->a + bmat->i[j]; 1895 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1896 sum += PetscAbsScalar(*v); v++; 1897 } 1898 if (sum > ntemp) ntemp = sum; 1899 } 1900 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1901 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1902 } 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #undef __FUNCT__ 1907 #define __FUNCT__ "MatTranspose_MPIAIJ" 1908 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1909 { 1910 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1911 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1912 PetscErrorCode ierr; 1913 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1914 PetscInt cstart = A->cmap->rstart,ncol; 1915 Mat B; 1916 MatScalar *array; 1917 1918 PetscFunctionBegin; 1919 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1920 1921 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1922 ai = Aloc->i; aj = Aloc->j; 1923 bi = Bloc->i; bj = Bloc->j; 1924 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1925 PetscInt *d_nnz,*g_nnz,*o_nnz; 1926 PetscSFNode *oloc; 1927 PETSC_UNUSED PetscSF sf; 1928 1929 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1930 /* compute d_nnz for preallocation */ 1931 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1932 for (i=0; i<ai[ma]; i++) { 1933 d_nnz[aj[i]]++; 1934 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1935 } 1936 /* compute local off-diagonal contributions */ 1937 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1938 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1939 /* map those to global */ 1940 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1941 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1942 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1943 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1944 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1945 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1947 1948 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1949 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1950 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 1951 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1952 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1953 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1954 } else { 1955 B = *matout; 1956 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1957 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1958 } 1959 1960 /* copy over the A part */ 1961 array = Aloc->a; 1962 row = A->rmap->rstart; 1963 for (i=0; i<ma; i++) { 1964 ncol = ai[i+1]-ai[i]; 1965 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1966 row++; 1967 array += ncol; aj += ncol; 1968 } 1969 aj = Aloc->j; 1970 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1971 1972 /* copy over the B part */ 1973 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1974 array = Bloc->a; 1975 row = A->rmap->rstart; 1976 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1977 cols_tmp = cols; 1978 for (i=0; i<mb; i++) { 1979 ncol = bi[i+1]-bi[i]; 1980 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1981 row++; 1982 array += ncol; cols_tmp += ncol; 1983 } 1984 ierr = PetscFree(cols);CHKERRQ(ierr); 1985 1986 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1987 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1989 *matout = B; 1990 } else { 1991 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1998 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1999 { 2000 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2001 Mat a = aij->A,b = aij->B; 2002 PetscErrorCode ierr; 2003 PetscInt s1,s2,s3; 2004 2005 PetscFunctionBegin; 2006 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2007 if (rr) { 2008 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2009 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2010 /* Overlap communication with computation. */ 2011 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2012 } 2013 if (ll) { 2014 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2015 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2016 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2017 } 2018 /* scale the diagonal block */ 2019 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2020 2021 if (rr) { 2022 /* Do a scatter end and then right scale the off-diagonal block */ 2023 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2024 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2025 } 2026 PetscFunctionReturn(0); 2027 } 2028 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2031 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2032 { 2033 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2034 PetscErrorCode ierr; 2035 2036 PetscFunctionBegin; 2037 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2038 PetscFunctionReturn(0); 2039 } 2040 2041 #undef __FUNCT__ 2042 #define __FUNCT__ "MatEqual_MPIAIJ" 2043 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2044 { 2045 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2046 Mat a,b,c,d; 2047 PetscBool flg; 2048 PetscErrorCode ierr; 2049 2050 PetscFunctionBegin; 2051 a = matA->A; b = matA->B; 2052 c = matB->A; d = matB->B; 2053 2054 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2055 if (flg) { 2056 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2057 } 2058 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2059 PetscFunctionReturn(0); 2060 } 2061 2062 #undef __FUNCT__ 2063 #define __FUNCT__ "MatCopy_MPIAIJ" 2064 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2065 { 2066 PetscErrorCode ierr; 2067 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2068 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2069 2070 PetscFunctionBegin; 2071 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2072 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2073 /* because of the column compression in the off-processor part of the matrix a->B, 2074 the number of columns in a->B and b->B may be different, hence we cannot call 2075 the MatCopy() directly on the two parts. If need be, we can provide a more 2076 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2077 then copying the submatrices */ 2078 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2079 } else { 2080 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2081 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2082 } 2083 PetscFunctionReturn(0); 2084 } 2085 2086 #undef __FUNCT__ 2087 #define __FUNCT__ "MatSetUp_MPIAIJ" 2088 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2089 { 2090 PetscErrorCode ierr; 2091 2092 PetscFunctionBegin; 2093 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2094 PetscFunctionReturn(0); 2095 } 2096 2097 #undef __FUNCT__ 2098 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2099 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2100 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2101 { 2102 PetscInt i,m=Y->rmap->N; 2103 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2104 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2105 const PetscInt *xi = x->i,*yi = y->i; 2106 2107 PetscFunctionBegin; 2108 /* Set the number of nonzeros in the new matrix */ 2109 for (i=0; i<m; i++) { 2110 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2111 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2112 nnz[i] = 0; 2113 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2114 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2115 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2116 nnz[i]++; 2117 } 2118 for (; k<nzy; k++) nnz[i]++; 2119 } 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatAXPY_MPIAIJ" 2125 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2126 { 2127 PetscErrorCode ierr; 2128 PetscInt i; 2129 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2130 PetscBLASInt bnz,one=1; 2131 Mat_SeqAIJ *x,*y; 2132 2133 PetscFunctionBegin; 2134 if (str == SAME_NONZERO_PATTERN) { 2135 PetscScalar alpha = a; 2136 x = (Mat_SeqAIJ*)xx->A->data; 2137 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2138 y = (Mat_SeqAIJ*)yy->A->data; 2139 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2140 x = (Mat_SeqAIJ*)xx->B->data; 2141 y = (Mat_SeqAIJ*)yy->B->data; 2142 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2143 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2144 } else if (str == SUBSET_NONZERO_PATTERN) { 2145 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2146 2147 x = (Mat_SeqAIJ*)xx->B->data; 2148 y = (Mat_SeqAIJ*)yy->B->data; 2149 if (y->xtoy && y->XtoY != xx->B) { 2150 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2151 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2152 } 2153 if (!y->xtoy) { /* get xtoy */ 2154 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2155 y->XtoY = xx->B; 2156 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2157 } 2158 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2159 } else { 2160 Mat B; 2161 PetscInt *nnz_d,*nnz_o; 2162 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2163 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2164 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2165 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2166 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2167 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2168 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2169 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2170 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2171 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2172 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2173 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2174 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2175 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2176 } 2177 PetscFunctionReturn(0); 2178 } 2179 2180 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2181 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "MatConjugate_MPIAIJ" 2184 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2185 { 2186 #if defined(PETSC_USE_COMPLEX) 2187 PetscErrorCode ierr; 2188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2189 2190 PetscFunctionBegin; 2191 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2192 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2193 #else 2194 PetscFunctionBegin; 2195 #endif 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatRealPart_MPIAIJ" 2201 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2208 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2214 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2215 { 2216 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2217 PetscErrorCode ierr; 2218 2219 PetscFunctionBegin; 2220 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2221 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 #if defined(PETSC_HAVE_PBGL) 2226 2227 #include <boost/parallel/mpi/bsp_process_group.hpp> 2228 #include <boost/graph/distributed/ilu_default_graph.hpp> 2229 #include <boost/graph/distributed/ilu_0_block.hpp> 2230 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2231 #include <boost/graph/distributed/petsc/interface.hpp> 2232 #include <boost/multi_array.hpp> 2233 #include <boost/parallel/distributed_property_map->hpp> 2234 2235 #undef __FUNCT__ 2236 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2237 /* 2238 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2239 */ 2240 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2241 { 2242 namespace petsc = boost::distributed::petsc; 2243 2244 namespace graph_dist = boost::graph::distributed; 2245 using boost::graph::distributed::ilu_default::process_group_type; 2246 using boost::graph::ilu_permuted; 2247 2248 PetscBool row_identity, col_identity; 2249 PetscContainer c; 2250 PetscInt m, n, M, N; 2251 PetscErrorCode ierr; 2252 2253 PetscFunctionBegin; 2254 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2255 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2256 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2257 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2258 2259 process_group_type pg; 2260 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2261 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2262 lgraph_type& level_graph = *lgraph_p; 2263 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2264 2265 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2266 ilu_permuted(level_graph); 2267 2268 /* put together the new matrix */ 2269 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2270 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2271 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2272 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2273 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2274 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2275 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2276 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2277 2278 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2279 ierr = PetscContainerSetPointer(c, lgraph_p); 2280 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2281 ierr = PetscContainerDestroy(&c); 2282 PetscFunctionReturn(0); 2283 } 2284 2285 #undef __FUNCT__ 2286 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2287 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2288 { 2289 PetscFunctionBegin; 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatSolve_MPIAIJ" 2295 /* 2296 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2297 */ 2298 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2299 { 2300 namespace graph_dist = boost::graph::distributed; 2301 2302 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2303 lgraph_type *lgraph_p; 2304 PetscContainer c; 2305 PetscErrorCode ierr; 2306 2307 PetscFunctionBegin; 2308 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2309 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2310 ierr = VecCopy(b, x);CHKERRQ(ierr); 2311 2312 PetscScalar *array_x; 2313 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2314 PetscInt sx; 2315 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2316 2317 PetscScalar *array_b; 2318 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2319 PetscInt sb; 2320 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2321 2322 lgraph_type& level_graph = *lgraph_p; 2323 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2324 2325 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2326 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2327 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2328 2329 typedef boost::iterator_property_map<array_ref_type::iterator, 2330 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2331 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2332 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2333 2334 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2335 PetscFunctionReturn(0); 2336 } 2337 #endif 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatDestroy_MatRedundant" 2341 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2342 { 2343 PetscErrorCode ierr; 2344 Mat_Redundant *redund; 2345 PetscInt i; 2346 PetscMPIInt size; 2347 2348 PetscFunctionBegin; 2349 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2350 if (size == 1) { 2351 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2352 redund = a->redundant; 2353 } else { 2354 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2355 redund = a->redundant; 2356 } 2357 if (redund){ 2358 if (redund->matseq) { /* via MatGetSubMatrices() */ 2359 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2360 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2361 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2362 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2363 } else { 2364 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2365 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2366 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2367 for (i=0; i<redund->nrecvs; i++) { 2368 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2369 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2370 } 2371 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2372 } 2373 2374 if (redund->psubcomm) { 2375 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2376 } 2377 ierr = redund->Destroy(A);CHKERRQ(ierr); 2378 ierr = PetscFree(redund);CHKERRQ(ierr); 2379 } 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2385 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2386 { 2387 PetscMPIInt rank,size; 2388 MPI_Comm comm; 2389 PetscErrorCode ierr; 2390 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2391 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2392 PetscInt *rowrange = mat->rmap->range; 2393 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2394 Mat A = aij->A,B=aij->B,C=*matredundant; 2395 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2396 PetscScalar *sbuf_a; 2397 PetscInt nzlocal=a->nz+b->nz; 2398 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2399 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2400 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2401 MatScalar *aworkA,*aworkB; 2402 PetscScalar *vals; 2403 PetscMPIInt tag1,tag2,tag3,imdex; 2404 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2405 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2406 MPI_Status recv_status,*send_status; 2407 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2408 PetscInt **rbuf_j=NULL; 2409 PetscScalar **rbuf_a=NULL; 2410 Mat_Redundant *redund =NULL; 2411 2412 PetscFunctionBegin; 2413 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2414 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2415 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2416 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2417 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2418 2419 if (reuse == MAT_REUSE_MATRIX) { 2420 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2421 if (subsize == 1) { 2422 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2423 redund = c->redundant; 2424 } else { 2425 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2426 redund = c->redundant; 2427 } 2428 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2429 2430 nsends = redund->nsends; 2431 nrecvs = redund->nrecvs; 2432 send_rank = redund->send_rank; 2433 recv_rank = redund->recv_rank; 2434 sbuf_nz = redund->sbuf_nz; 2435 rbuf_nz = redund->rbuf_nz; 2436 sbuf_j = redund->sbuf_j; 2437 sbuf_a = redund->sbuf_a; 2438 rbuf_j = redund->rbuf_j; 2439 rbuf_a = redund->rbuf_a; 2440 } 2441 2442 if (reuse == MAT_INITIAL_MATRIX) { 2443 PetscInt nleftover,np_subcomm; 2444 2445 /* get the destination processors' id send_rank, nsends and nrecvs */ 2446 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2447 2448 np_subcomm = size/nsubcomm; 2449 nleftover = size - nsubcomm*np_subcomm; 2450 2451 /* block of codes below is specific for INTERLACED */ 2452 /* ------------------------------------------------*/ 2453 nsends = 0; nrecvs = 0; 2454 for (i=0; i<size; i++) { 2455 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2456 send_rank[nsends++] = i; 2457 recv_rank[nrecvs++] = i; 2458 } 2459 } 2460 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2461 i = size-nleftover-1; 2462 j = 0; 2463 while (j < nsubcomm - nleftover) { 2464 send_rank[nsends++] = i; 2465 i--; j++; 2466 } 2467 } 2468 2469 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2470 for (i=0; i<nleftover; i++) { 2471 recv_rank[nrecvs++] = size-nleftover+i; 2472 } 2473 } 2474 /*----------------------------------------------*/ 2475 2476 /* allocate sbuf_j, sbuf_a */ 2477 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2478 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2479 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2480 /* 2481 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2482 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2483 */ 2484 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2485 2486 /* copy mat's local entries into the buffers */ 2487 if (reuse == MAT_INITIAL_MATRIX) { 2488 rownz_max = 0; 2489 rptr = sbuf_j; 2490 cols = sbuf_j + rend-rstart + 1; 2491 vals = sbuf_a; 2492 rptr[0] = 0; 2493 for (i=0; i<rend-rstart; i++) { 2494 row = i + rstart; 2495 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2496 ncols = nzA + nzB; 2497 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2498 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2499 /* load the column indices for this row into cols */ 2500 lwrite = 0; 2501 for (l=0; l<nzB; l++) { 2502 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2503 vals[lwrite] = aworkB[l]; 2504 cols[lwrite++] = ctmp; 2505 } 2506 } 2507 for (l=0; l<nzA; l++) { 2508 vals[lwrite] = aworkA[l]; 2509 cols[lwrite++] = cstart + cworkA[l]; 2510 } 2511 for (l=0; l<nzB; l++) { 2512 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2513 vals[lwrite] = aworkB[l]; 2514 cols[lwrite++] = ctmp; 2515 } 2516 } 2517 vals += ncols; 2518 cols += ncols; 2519 rptr[i+1] = rptr[i] + ncols; 2520 if (rownz_max < ncols) rownz_max = ncols; 2521 } 2522 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2523 } else { /* only copy matrix values into sbuf_a */ 2524 rptr = sbuf_j; 2525 vals = sbuf_a; 2526 rptr[0] = 0; 2527 for (i=0; i<rend-rstart; i++) { 2528 row = i + rstart; 2529 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2530 ncols = nzA + nzB; 2531 cworkB = b->j + b->i[i]; 2532 aworkA = a->a + a->i[i]; 2533 aworkB = b->a + b->i[i]; 2534 lwrite = 0; 2535 for (l=0; l<nzB; l++) { 2536 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2537 } 2538 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2539 for (l=0; l<nzB; l++) { 2540 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2541 } 2542 vals += ncols; 2543 rptr[i+1] = rptr[i] + ncols; 2544 } 2545 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2546 2547 /* send nzlocal to others, and recv other's nzlocal */ 2548 /*--------------------------------------------------*/ 2549 if (reuse == MAT_INITIAL_MATRIX) { 2550 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2551 2552 s_waits2 = s_waits3 + nsends; 2553 s_waits1 = s_waits2 + nsends; 2554 r_waits1 = s_waits1 + nsends; 2555 r_waits2 = r_waits1 + nrecvs; 2556 r_waits3 = r_waits2 + nrecvs; 2557 } else { 2558 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2559 2560 r_waits3 = s_waits3 + nsends; 2561 } 2562 2563 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2564 if (reuse == MAT_INITIAL_MATRIX) { 2565 /* get new tags to keep the communication clean */ 2566 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2567 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2568 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2569 2570 /* post receives of other's nzlocal */ 2571 for (i=0; i<nrecvs; i++) { 2572 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2573 } 2574 /* send nzlocal to others */ 2575 for (i=0; i<nsends; i++) { 2576 sbuf_nz[i] = nzlocal; 2577 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2578 } 2579 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2580 count = nrecvs; 2581 while (count) { 2582 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2583 2584 recv_rank[imdex] = recv_status.MPI_SOURCE; 2585 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2586 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2587 2588 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2589 2590 rbuf_nz[imdex] += i + 2; 2591 2592 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2593 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2594 count--; 2595 } 2596 /* wait on sends of nzlocal */ 2597 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2598 /* send mat->i,j to others, and recv from other's */ 2599 /*------------------------------------------------*/ 2600 for (i=0; i<nsends; i++) { 2601 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2602 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2603 } 2604 /* wait on receives of mat->i,j */ 2605 /*------------------------------*/ 2606 count = nrecvs; 2607 while (count) { 2608 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2609 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2610 count--; 2611 } 2612 /* wait on sends of mat->i,j */ 2613 /*---------------------------*/ 2614 if (nsends) { 2615 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2616 } 2617 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2618 2619 /* post receives, send and receive mat->a */ 2620 /*----------------------------------------*/ 2621 for (imdex=0; imdex<nrecvs; imdex++) { 2622 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2623 } 2624 for (i=0; i<nsends; i++) { 2625 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2626 } 2627 count = nrecvs; 2628 while (count) { 2629 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2630 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2631 count--; 2632 } 2633 if (nsends) { 2634 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2635 } 2636 2637 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2638 2639 /* create redundant matrix */ 2640 /*-------------------------*/ 2641 if (reuse == MAT_INITIAL_MATRIX) { 2642 const PetscInt *range; 2643 PetscInt rstart_sub,rend_sub,mloc_sub; 2644 2645 /* compute rownz_max for preallocation */ 2646 for (imdex=0; imdex<nrecvs; imdex++) { 2647 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2648 rptr = rbuf_j[imdex]; 2649 for (i=0; i<j; i++) { 2650 ncols = rptr[i+1] - rptr[i]; 2651 if (rownz_max < ncols) rownz_max = ncols; 2652 } 2653 } 2654 2655 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2656 2657 /* get local size of redundant matrix 2658 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2659 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2660 rstart_sub = range[nsubcomm*subrank]; 2661 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2662 rend_sub = range[nsubcomm*(subrank+1)]; 2663 } else { 2664 rend_sub = mat->rmap->N; 2665 } 2666 mloc_sub = rend_sub - rstart_sub; 2667 2668 if (M == N) { 2669 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2670 } else { /* non-square matrix */ 2671 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2672 } 2673 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2674 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2675 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2676 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2677 } else { 2678 C = *matredundant; 2679 } 2680 2681 /* insert local matrix entries */ 2682 rptr = sbuf_j; 2683 cols = sbuf_j + rend-rstart + 1; 2684 vals = sbuf_a; 2685 for (i=0; i<rend-rstart; i++) { 2686 row = i + rstart; 2687 ncols = rptr[i+1] - rptr[i]; 2688 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2689 vals += ncols; 2690 cols += ncols; 2691 } 2692 /* insert received matrix entries */ 2693 for (imdex=0; imdex<nrecvs; imdex++) { 2694 rstart = rowrange[recv_rank[imdex]]; 2695 rend = rowrange[recv_rank[imdex]+1]; 2696 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2697 rptr = rbuf_j[imdex]; 2698 cols = rbuf_j[imdex] + rend-rstart + 1; 2699 vals = rbuf_a[imdex]; 2700 for (i=0; i<rend-rstart; i++) { 2701 row = i + rstart; 2702 ncols = rptr[i+1] - rptr[i]; 2703 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2704 vals += ncols; 2705 cols += ncols; 2706 } 2707 } 2708 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2709 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2710 2711 if (reuse == MAT_INITIAL_MATRIX) { 2712 *matredundant = C; 2713 2714 /* create a supporting struct and attach it to C for reuse */ 2715 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2716 if (subsize == 1) { 2717 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2718 c->redundant = redund; 2719 } else { 2720 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2721 c->redundant = redund; 2722 } 2723 2724 redund->nzlocal = nzlocal; 2725 redund->nsends = nsends; 2726 redund->nrecvs = nrecvs; 2727 redund->send_rank = send_rank; 2728 redund->recv_rank = recv_rank; 2729 redund->sbuf_nz = sbuf_nz; 2730 redund->rbuf_nz = rbuf_nz; 2731 redund->sbuf_j = sbuf_j; 2732 redund->sbuf_a = sbuf_a; 2733 redund->rbuf_j = rbuf_j; 2734 redund->rbuf_a = rbuf_a; 2735 redund->psubcomm = NULL; 2736 2737 redund->Destroy = C->ops->destroy; 2738 C->ops->destroy = MatDestroy_MatRedundant; 2739 } 2740 PetscFunctionReturn(0); 2741 } 2742 2743 #undef __FUNCT__ 2744 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2745 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2746 { 2747 PetscErrorCode ierr; 2748 MPI_Comm comm; 2749 PetscMPIInt size,subsize; 2750 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2751 Mat_Redundant *redund=NULL; 2752 PetscSubcomm psubcomm=NULL; 2753 MPI_Comm subcomm_in=subcomm; 2754 Mat *matseq; 2755 IS isrow,iscol; 2756 2757 PetscFunctionBegin; 2758 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2759 if (reuse == MAT_INITIAL_MATRIX) { 2760 /* create psubcomm, then get subcomm */ 2761 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2762 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2763 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2764 2765 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2766 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2767 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2768 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2769 subcomm = psubcomm->comm; 2770 } else { /* retrieve psubcomm and subcomm */ 2771 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2772 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2773 if (subsize == 1) { 2774 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2775 redund = c->redundant; 2776 } else { 2777 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2778 redund = c->redundant; 2779 } 2780 psubcomm = redund->psubcomm; 2781 } 2782 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2783 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2784 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2785 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2786 if (subsize == 1) { 2787 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2788 c->redundant->psubcomm = psubcomm; 2789 } else { 2790 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2791 c->redundant->psubcomm = psubcomm ; 2792 } 2793 } 2794 PetscFunctionReturn(0); 2795 } 2796 } 2797 2798 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2799 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2800 if (reuse == MAT_INITIAL_MATRIX) { 2801 /* create a local sequential matrix matseq[0] */ 2802 mloc_sub = PETSC_DECIDE; 2803 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2804 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2805 rstart = rend - mloc_sub; 2806 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2807 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2808 } else { /* reuse == MAT_REUSE_MATRIX */ 2809 if (subsize == 1) { 2810 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2811 redund = c->redundant; 2812 } else { 2813 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2814 redund = c->redundant; 2815 } 2816 2817 isrow = redund->isrow; 2818 iscol = redund->iscol; 2819 matseq = redund->matseq; 2820 } 2821 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2822 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2823 2824 if (reuse == MAT_INITIAL_MATRIX) { 2825 /* create a supporting struct and attach it to C for reuse */ 2826 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2827 if (subsize == 1) { 2828 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2829 c->redundant = redund; 2830 } else { 2831 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2832 c->redundant = redund; 2833 } 2834 redund->isrow = isrow; 2835 redund->iscol = iscol; 2836 redund->matseq = matseq; 2837 redund->psubcomm = psubcomm; 2838 redund->Destroy = (*matredundant)->ops->destroy; 2839 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2840 } 2841 PetscFunctionReturn(0); 2842 } 2843 2844 #undef __FUNCT__ 2845 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2846 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2847 { 2848 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2849 PetscErrorCode ierr; 2850 PetscInt i,*idxb = 0; 2851 PetscScalar *va,*vb; 2852 Vec vtmp; 2853 2854 PetscFunctionBegin; 2855 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2856 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2857 if (idx) { 2858 for (i=0; i<A->rmap->n; i++) { 2859 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2860 } 2861 } 2862 2863 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2864 if (idx) { 2865 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2866 } 2867 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2868 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2869 2870 for (i=0; i<A->rmap->n; i++) { 2871 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2872 va[i] = vb[i]; 2873 if (idx) idx[i] = a->garray[idxb[i]]; 2874 } 2875 } 2876 2877 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2878 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2879 ierr = PetscFree(idxb);CHKERRQ(ierr); 2880 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2881 PetscFunctionReturn(0); 2882 } 2883 2884 #undef __FUNCT__ 2885 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2886 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2887 { 2888 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2889 PetscErrorCode ierr; 2890 PetscInt i,*idxb = 0; 2891 PetscScalar *va,*vb; 2892 Vec vtmp; 2893 2894 PetscFunctionBegin; 2895 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2896 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2897 if (idx) { 2898 for (i=0; i<A->cmap->n; i++) { 2899 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2900 } 2901 } 2902 2903 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2904 if (idx) { 2905 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2906 } 2907 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2908 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2909 2910 for (i=0; i<A->rmap->n; i++) { 2911 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2912 va[i] = vb[i]; 2913 if (idx) idx[i] = a->garray[idxb[i]]; 2914 } 2915 } 2916 2917 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2918 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2919 ierr = PetscFree(idxb);CHKERRQ(ierr); 2920 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2921 PetscFunctionReturn(0); 2922 } 2923 2924 #undef __FUNCT__ 2925 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2926 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2927 { 2928 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2929 PetscInt n = A->rmap->n; 2930 PetscInt cstart = A->cmap->rstart; 2931 PetscInt *cmap = mat->garray; 2932 PetscInt *diagIdx, *offdiagIdx; 2933 Vec diagV, offdiagV; 2934 PetscScalar *a, *diagA, *offdiagA; 2935 PetscInt r; 2936 PetscErrorCode ierr; 2937 2938 PetscFunctionBegin; 2939 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2940 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2941 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2942 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2943 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2944 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2945 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2946 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2947 for (r = 0; r < n; ++r) { 2948 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2949 a[r] = diagA[r]; 2950 idx[r] = cstart + diagIdx[r]; 2951 } else { 2952 a[r] = offdiagA[r]; 2953 idx[r] = cmap[offdiagIdx[r]]; 2954 } 2955 } 2956 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2957 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2958 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2959 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2960 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2961 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2962 PetscFunctionReturn(0); 2963 } 2964 2965 #undef __FUNCT__ 2966 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2967 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2968 { 2969 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2970 PetscInt n = A->rmap->n; 2971 PetscInt cstart = A->cmap->rstart; 2972 PetscInt *cmap = mat->garray; 2973 PetscInt *diagIdx, *offdiagIdx; 2974 Vec diagV, offdiagV; 2975 PetscScalar *a, *diagA, *offdiagA; 2976 PetscInt r; 2977 PetscErrorCode ierr; 2978 2979 PetscFunctionBegin; 2980 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2981 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2982 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2983 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2984 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2985 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2986 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2987 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2988 for (r = 0; r < n; ++r) { 2989 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2990 a[r] = diagA[r]; 2991 idx[r] = cstart + diagIdx[r]; 2992 } else { 2993 a[r] = offdiagA[r]; 2994 idx[r] = cmap[offdiagIdx[r]]; 2995 } 2996 } 2997 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2998 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2999 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3000 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3001 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3002 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3003 PetscFunctionReturn(0); 3004 } 3005 3006 #undef __FUNCT__ 3007 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3008 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3009 { 3010 PetscErrorCode ierr; 3011 Mat *dummy; 3012 3013 PetscFunctionBegin; 3014 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3015 *newmat = *dummy; 3016 ierr = PetscFree(dummy);CHKERRQ(ierr); 3017 PetscFunctionReturn(0); 3018 } 3019 3020 #undef __FUNCT__ 3021 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3022 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3023 { 3024 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3025 PetscErrorCode ierr; 3026 3027 PetscFunctionBegin; 3028 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3029 PetscFunctionReturn(0); 3030 } 3031 3032 #undef __FUNCT__ 3033 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3034 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3035 { 3036 PetscErrorCode ierr; 3037 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3038 3039 PetscFunctionBegin; 3040 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3041 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3042 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3043 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3044 PetscFunctionReturn(0); 3045 } 3046 3047 /* -------------------------------------------------------------------*/ 3048 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3049 MatGetRow_MPIAIJ, 3050 MatRestoreRow_MPIAIJ, 3051 MatMult_MPIAIJ, 3052 /* 4*/ MatMultAdd_MPIAIJ, 3053 MatMultTranspose_MPIAIJ, 3054 MatMultTransposeAdd_MPIAIJ, 3055 #if defined(PETSC_HAVE_PBGL) 3056 MatSolve_MPIAIJ, 3057 #else 3058 0, 3059 #endif 3060 0, 3061 0, 3062 /*10*/ 0, 3063 0, 3064 0, 3065 MatSOR_MPIAIJ, 3066 MatTranspose_MPIAIJ, 3067 /*15*/ MatGetInfo_MPIAIJ, 3068 MatEqual_MPIAIJ, 3069 MatGetDiagonal_MPIAIJ, 3070 MatDiagonalScale_MPIAIJ, 3071 MatNorm_MPIAIJ, 3072 /*20*/ MatAssemblyBegin_MPIAIJ, 3073 MatAssemblyEnd_MPIAIJ, 3074 MatSetOption_MPIAIJ, 3075 MatZeroEntries_MPIAIJ, 3076 /*24*/ MatZeroRows_MPIAIJ, 3077 0, 3078 #if defined(PETSC_HAVE_PBGL) 3079 0, 3080 #else 3081 0, 3082 #endif 3083 0, 3084 0, 3085 /*29*/ MatSetUp_MPIAIJ, 3086 #if defined(PETSC_HAVE_PBGL) 3087 0, 3088 #else 3089 0, 3090 #endif 3091 0, 3092 0, 3093 0, 3094 /*34*/ MatDuplicate_MPIAIJ, 3095 0, 3096 0, 3097 0, 3098 0, 3099 /*39*/ MatAXPY_MPIAIJ, 3100 MatGetSubMatrices_MPIAIJ, 3101 MatIncreaseOverlap_MPIAIJ, 3102 MatGetValues_MPIAIJ, 3103 MatCopy_MPIAIJ, 3104 /*44*/ MatGetRowMax_MPIAIJ, 3105 MatScale_MPIAIJ, 3106 0, 3107 0, 3108 MatZeroRowsColumns_MPIAIJ, 3109 /*49*/ MatSetRandom_MPIAIJ, 3110 0, 3111 0, 3112 0, 3113 0, 3114 /*54*/ MatFDColoringCreate_MPIXAIJ, 3115 0, 3116 MatSetUnfactored_MPIAIJ, 3117 MatPermute_MPIAIJ, 3118 0, 3119 /*59*/ MatGetSubMatrix_MPIAIJ, 3120 MatDestroy_MPIAIJ, 3121 MatView_MPIAIJ, 3122 0, 3123 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3124 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3125 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3126 0, 3127 0, 3128 0, 3129 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3130 MatGetRowMinAbs_MPIAIJ, 3131 0, 3132 MatSetColoring_MPIAIJ, 3133 0, 3134 MatSetValuesAdifor_MPIAIJ, 3135 /*75*/ MatFDColoringApply_AIJ, 3136 0, 3137 0, 3138 0, 3139 MatFindZeroDiagonals_MPIAIJ, 3140 /*80*/ 0, 3141 0, 3142 0, 3143 /*83*/ MatLoad_MPIAIJ, 3144 0, 3145 0, 3146 0, 3147 0, 3148 0, 3149 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3150 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3151 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3152 MatPtAP_MPIAIJ_MPIAIJ, 3153 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3154 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3155 0, 3156 0, 3157 0, 3158 0, 3159 /*99*/ 0, 3160 0, 3161 0, 3162 MatConjugate_MPIAIJ, 3163 0, 3164 /*104*/MatSetValuesRow_MPIAIJ, 3165 MatRealPart_MPIAIJ, 3166 MatImaginaryPart_MPIAIJ, 3167 0, 3168 0, 3169 /*109*/0, 3170 MatGetRedundantMatrix_MPIAIJ, 3171 MatGetRowMin_MPIAIJ, 3172 0, 3173 0, 3174 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3175 0, 3176 0, 3177 0, 3178 0, 3179 /*119*/0, 3180 0, 3181 0, 3182 0, 3183 MatGetMultiProcBlock_MPIAIJ, 3184 /*124*/MatFindNonzeroRows_MPIAIJ, 3185 MatGetColumnNorms_MPIAIJ, 3186 MatInvertBlockDiagonal_MPIAIJ, 3187 0, 3188 MatGetSubMatricesParallel_MPIAIJ, 3189 /*129*/0, 3190 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3191 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3192 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3193 0, 3194 /*134*/0, 3195 0, 3196 0, 3197 0, 3198 0, 3199 /*139*/0, 3200 0, 3201 0, 3202 MatFDColoringSetUp_MPIXAIJ 3203 }; 3204 3205 /* ----------------------------------------------------------------------------------------*/ 3206 3207 #undef __FUNCT__ 3208 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3209 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3210 { 3211 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3212 PetscErrorCode ierr; 3213 3214 PetscFunctionBegin; 3215 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3216 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3217 PetscFunctionReturn(0); 3218 } 3219 3220 #undef __FUNCT__ 3221 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3222 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3223 { 3224 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3225 PetscErrorCode ierr; 3226 3227 PetscFunctionBegin; 3228 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3229 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3230 PetscFunctionReturn(0); 3231 } 3232 3233 #undef __FUNCT__ 3234 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3235 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3236 { 3237 Mat_MPIAIJ *b; 3238 PetscErrorCode ierr; 3239 3240 PetscFunctionBegin; 3241 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3242 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3243 b = (Mat_MPIAIJ*)B->data; 3244 3245 if (!B->preallocated) { 3246 /* Explicitly create 2 MATSEQAIJ matrices. */ 3247 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3248 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3249 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3250 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3251 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3252 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3253 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3254 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3255 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3256 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3257 } 3258 3259 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3260 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3261 B->preallocated = PETSC_TRUE; 3262 PetscFunctionReturn(0); 3263 } 3264 3265 #undef __FUNCT__ 3266 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3267 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3268 { 3269 Mat mat; 3270 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3271 PetscErrorCode ierr; 3272 3273 PetscFunctionBegin; 3274 *newmat = 0; 3275 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3276 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3277 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3278 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3279 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3280 a = (Mat_MPIAIJ*)mat->data; 3281 3282 mat->factortype = matin->factortype; 3283 mat->rmap->bs = matin->rmap->bs; 3284 mat->cmap->bs = matin->cmap->bs; 3285 mat->assembled = PETSC_TRUE; 3286 mat->insertmode = NOT_SET_VALUES; 3287 mat->preallocated = PETSC_TRUE; 3288 3289 a->size = oldmat->size; 3290 a->rank = oldmat->rank; 3291 a->donotstash = oldmat->donotstash; 3292 a->roworiented = oldmat->roworiented; 3293 a->rowindices = 0; 3294 a->rowvalues = 0; 3295 a->getrowactive = PETSC_FALSE; 3296 3297 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3298 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3299 3300 if (oldmat->colmap) { 3301 #if defined(PETSC_USE_CTABLE) 3302 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3303 #else 3304 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3305 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3306 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3307 #endif 3308 } else a->colmap = 0; 3309 if (oldmat->garray) { 3310 PetscInt len; 3311 len = oldmat->B->cmap->n; 3312 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3313 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3314 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3315 } else a->garray = 0; 3316 3317 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3318 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3319 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3320 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3321 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3322 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3323 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3324 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3325 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3326 *newmat = mat; 3327 PetscFunctionReturn(0); 3328 } 3329 3330 3331 3332 #undef __FUNCT__ 3333 #define __FUNCT__ "MatLoad_MPIAIJ" 3334 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3335 { 3336 PetscScalar *vals,*svals; 3337 MPI_Comm comm; 3338 PetscErrorCode ierr; 3339 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3340 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3341 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3342 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3343 PetscInt cend,cstart,n,*rowners,sizesset=1; 3344 int fd; 3345 PetscInt bs = 1; 3346 3347 PetscFunctionBegin; 3348 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3349 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3350 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3351 if (!rank) { 3352 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3353 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3354 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3355 } 3356 3357 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3358 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3359 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3360 3361 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3362 3363 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3364 M = header[1]; N = header[2]; 3365 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3366 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3367 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3368 3369 /* If global sizes are set, check if they are consistent with that given in the file */ 3370 if (sizesset) { 3371 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3372 } 3373 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3374 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3375 3376 /* determine ownership of all (block) rows */ 3377 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3378 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3379 else m = newMat->rmap->n; /* Set by user */ 3380 3381 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3382 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3383 3384 /* First process needs enough room for process with most rows */ 3385 if (!rank) { 3386 mmax = rowners[1]; 3387 for (i=2; i<=size; i++) { 3388 mmax = PetscMax(mmax, rowners[i]); 3389 } 3390 } else mmax = -1; /* unused, but compilers complain */ 3391 3392 rowners[0] = 0; 3393 for (i=2; i<=size; i++) { 3394 rowners[i] += rowners[i-1]; 3395 } 3396 rstart = rowners[rank]; 3397 rend = rowners[rank+1]; 3398 3399 /* distribute row lengths to all processors */ 3400 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3401 if (!rank) { 3402 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3403 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3404 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3405 for (j=0; j<m; j++) { 3406 procsnz[0] += ourlens[j]; 3407 } 3408 for (i=1; i<size; i++) { 3409 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3410 /* calculate the number of nonzeros on each processor */ 3411 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3412 procsnz[i] += rowlengths[j]; 3413 } 3414 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3415 } 3416 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3417 } else { 3418 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3419 } 3420 3421 if (!rank) { 3422 /* determine max buffer needed and allocate it */ 3423 maxnz = 0; 3424 for (i=0; i<size; i++) { 3425 maxnz = PetscMax(maxnz,procsnz[i]); 3426 } 3427 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3428 3429 /* read in my part of the matrix column indices */ 3430 nz = procsnz[0]; 3431 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3432 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3433 3434 /* read in every one elses and ship off */ 3435 for (i=1; i<size; i++) { 3436 nz = procsnz[i]; 3437 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3438 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3439 } 3440 ierr = PetscFree(cols);CHKERRQ(ierr); 3441 } else { 3442 /* determine buffer space needed for message */ 3443 nz = 0; 3444 for (i=0; i<m; i++) { 3445 nz += ourlens[i]; 3446 } 3447 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3448 3449 /* receive message of column indices*/ 3450 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3451 } 3452 3453 /* determine column ownership if matrix is not square */ 3454 if (N != M) { 3455 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3456 else n = newMat->cmap->n; 3457 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3458 cstart = cend - n; 3459 } else { 3460 cstart = rstart; 3461 cend = rend; 3462 n = cend - cstart; 3463 } 3464 3465 /* loop over local rows, determining number of off diagonal entries */ 3466 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3467 jj = 0; 3468 for (i=0; i<m; i++) { 3469 for (j=0; j<ourlens[i]; j++) { 3470 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3471 jj++; 3472 } 3473 } 3474 3475 for (i=0; i<m; i++) { 3476 ourlens[i] -= offlens[i]; 3477 } 3478 if (!sizesset) { 3479 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3480 } 3481 3482 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3483 3484 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3485 3486 for (i=0; i<m; i++) { 3487 ourlens[i] += offlens[i]; 3488 } 3489 3490 if (!rank) { 3491 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3492 3493 /* read in my part of the matrix numerical values */ 3494 nz = procsnz[0]; 3495 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3496 3497 /* insert into matrix */ 3498 jj = rstart; 3499 smycols = mycols; 3500 svals = vals; 3501 for (i=0; i<m; i++) { 3502 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3503 smycols += ourlens[i]; 3504 svals += ourlens[i]; 3505 jj++; 3506 } 3507 3508 /* read in other processors and ship out */ 3509 for (i=1; i<size; i++) { 3510 nz = procsnz[i]; 3511 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3512 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3513 } 3514 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3515 } else { 3516 /* receive numeric values */ 3517 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3518 3519 /* receive message of values*/ 3520 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3521 3522 /* insert into matrix */ 3523 jj = rstart; 3524 smycols = mycols; 3525 svals = vals; 3526 for (i=0; i<m; i++) { 3527 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3528 smycols += ourlens[i]; 3529 svals += ourlens[i]; 3530 jj++; 3531 } 3532 } 3533 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3534 ierr = PetscFree(vals);CHKERRQ(ierr); 3535 ierr = PetscFree(mycols);CHKERRQ(ierr); 3536 ierr = PetscFree(rowners);CHKERRQ(ierr); 3537 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3538 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3539 PetscFunctionReturn(0); 3540 } 3541 3542 #undef __FUNCT__ 3543 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3544 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3545 { 3546 PetscErrorCode ierr; 3547 IS iscol_local; 3548 PetscInt csize; 3549 3550 PetscFunctionBegin; 3551 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3552 if (call == MAT_REUSE_MATRIX) { 3553 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3554 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3555 } else { 3556 PetscInt cbs; 3557 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3558 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3559 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3560 } 3561 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3562 if (call == MAT_INITIAL_MATRIX) { 3563 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3564 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3565 } 3566 PetscFunctionReturn(0); 3567 } 3568 3569 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3570 #undef __FUNCT__ 3571 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3572 /* 3573 Not great since it makes two copies of the submatrix, first an SeqAIJ 3574 in local and then by concatenating the local matrices the end result. 3575 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3576 3577 Note: This requires a sequential iscol with all indices. 3578 */ 3579 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3580 { 3581 PetscErrorCode ierr; 3582 PetscMPIInt rank,size; 3583 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3584 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3585 PetscBool allcolumns, colflag; 3586 Mat M,Mreuse; 3587 MatScalar *vwork,*aa; 3588 MPI_Comm comm; 3589 Mat_SeqAIJ *aij; 3590 3591 PetscFunctionBegin; 3592 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3593 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3594 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3595 3596 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3597 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3598 if (colflag && ncol == mat->cmap->N) { 3599 allcolumns = PETSC_TRUE; 3600 } else { 3601 allcolumns = PETSC_FALSE; 3602 } 3603 if (call == MAT_REUSE_MATRIX) { 3604 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3605 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3606 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3607 } else { 3608 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3609 } 3610 3611 /* 3612 m - number of local rows 3613 n - number of columns (same on all processors) 3614 rstart - first row in new global matrix generated 3615 */ 3616 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3617 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3618 if (call == MAT_INITIAL_MATRIX) { 3619 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3620 ii = aij->i; 3621 jj = aij->j; 3622 3623 /* 3624 Determine the number of non-zeros in the diagonal and off-diagonal 3625 portions of the matrix in order to do correct preallocation 3626 */ 3627 3628 /* first get start and end of "diagonal" columns */ 3629 if (csize == PETSC_DECIDE) { 3630 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3631 if (mglobal == n) { /* square matrix */ 3632 nlocal = m; 3633 } else { 3634 nlocal = n/size + ((n % size) > rank); 3635 } 3636 } else { 3637 nlocal = csize; 3638 } 3639 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3640 rstart = rend - nlocal; 3641 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3642 3643 /* next, compute all the lengths */ 3644 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3645 olens = dlens + m; 3646 for (i=0; i<m; i++) { 3647 jend = ii[i+1] - ii[i]; 3648 olen = 0; 3649 dlen = 0; 3650 for (j=0; j<jend; j++) { 3651 if (*jj < rstart || *jj >= rend) olen++; 3652 else dlen++; 3653 jj++; 3654 } 3655 olens[i] = olen; 3656 dlens[i] = dlen; 3657 } 3658 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3659 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3660 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3661 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3662 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3663 ierr = PetscFree(dlens);CHKERRQ(ierr); 3664 } else { 3665 PetscInt ml,nl; 3666 3667 M = *newmat; 3668 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3669 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3670 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3671 /* 3672 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3673 rather than the slower MatSetValues(). 3674 */ 3675 M->was_assembled = PETSC_TRUE; 3676 M->assembled = PETSC_FALSE; 3677 } 3678 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3679 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3680 ii = aij->i; 3681 jj = aij->j; 3682 aa = aij->a; 3683 for (i=0; i<m; i++) { 3684 row = rstart + i; 3685 nz = ii[i+1] - ii[i]; 3686 cwork = jj; jj += nz; 3687 vwork = aa; aa += nz; 3688 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3689 } 3690 3691 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3693 *newmat = M; 3694 3695 /* save submatrix used in processor for next request */ 3696 if (call == MAT_INITIAL_MATRIX) { 3697 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3698 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3699 } 3700 PetscFunctionReturn(0); 3701 } 3702 3703 #undef __FUNCT__ 3704 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3705 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3706 { 3707 PetscInt m,cstart, cend,j,nnz,i,d; 3708 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3709 const PetscInt *JJ; 3710 PetscScalar *values; 3711 PetscErrorCode ierr; 3712 3713 PetscFunctionBegin; 3714 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3715 3716 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3717 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3718 m = B->rmap->n; 3719 cstart = B->cmap->rstart; 3720 cend = B->cmap->rend; 3721 rstart = B->rmap->rstart; 3722 3723 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3724 3725 #if defined(PETSC_USE_DEBUGGING) 3726 for (i=0; i<m; i++) { 3727 nnz = Ii[i+1]- Ii[i]; 3728 JJ = J + Ii[i]; 3729 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3730 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3731 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3732 } 3733 #endif 3734 3735 for (i=0; i<m; i++) { 3736 nnz = Ii[i+1]- Ii[i]; 3737 JJ = J + Ii[i]; 3738 nnz_max = PetscMax(nnz_max,nnz); 3739 d = 0; 3740 for (j=0; j<nnz; j++) { 3741 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3742 } 3743 d_nnz[i] = d; 3744 o_nnz[i] = nnz - d; 3745 } 3746 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3747 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3748 3749 if (v) values = (PetscScalar*)v; 3750 else { 3751 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3752 } 3753 3754 for (i=0; i<m; i++) { 3755 ii = i + rstart; 3756 nnz = Ii[i+1]- Ii[i]; 3757 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3758 } 3759 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3760 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3761 3762 if (!v) { 3763 ierr = PetscFree(values);CHKERRQ(ierr); 3764 } 3765 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3766 PetscFunctionReturn(0); 3767 } 3768 3769 #undef __FUNCT__ 3770 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3771 /*@ 3772 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3773 (the default parallel PETSc format). 3774 3775 Collective on MPI_Comm 3776 3777 Input Parameters: 3778 + B - the matrix 3779 . i - the indices into j for the start of each local row (starts with zero) 3780 . j - the column indices for each local row (starts with zero) 3781 - v - optional values in the matrix 3782 3783 Level: developer 3784 3785 Notes: 3786 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3787 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3788 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3789 3790 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3791 3792 The format which is used for the sparse matrix input, is equivalent to a 3793 row-major ordering.. i.e for the following matrix, the input data expected is 3794 as shown: 3795 3796 1 0 0 3797 2 0 3 P0 3798 ------- 3799 4 5 6 P1 3800 3801 Process0 [P0]: rows_owned=[0,1] 3802 i = {0,1,3} [size = nrow+1 = 2+1] 3803 j = {0,0,2} [size = nz = 6] 3804 v = {1,2,3} [size = nz = 6] 3805 3806 Process1 [P1]: rows_owned=[2] 3807 i = {0,3} [size = nrow+1 = 1+1] 3808 j = {0,1,2} [size = nz = 6] 3809 v = {4,5,6} [size = nz = 6] 3810 3811 .keywords: matrix, aij, compressed row, sparse, parallel 3812 3813 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3814 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3815 @*/ 3816 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3817 { 3818 PetscErrorCode ierr; 3819 3820 PetscFunctionBegin; 3821 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3822 PetscFunctionReturn(0); 3823 } 3824 3825 #undef __FUNCT__ 3826 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3827 /*@C 3828 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3829 (the default parallel PETSc format). For good matrix assembly performance 3830 the user should preallocate the matrix storage by setting the parameters 3831 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3832 performance can be increased by more than a factor of 50. 3833 3834 Collective on MPI_Comm 3835 3836 Input Parameters: 3837 + A - the matrix 3838 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3839 (same value is used for all local rows) 3840 . d_nnz - array containing the number of nonzeros in the various rows of the 3841 DIAGONAL portion of the local submatrix (possibly different for each row) 3842 or NULL, if d_nz is used to specify the nonzero structure. 3843 The size of this array is equal to the number of local rows, i.e 'm'. 3844 For matrices that will be factored, you must leave room for (and set) 3845 the diagonal entry even if it is zero. 3846 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3847 submatrix (same value is used for all local rows). 3848 - o_nnz - array containing the number of nonzeros in the various rows of the 3849 OFF-DIAGONAL portion of the local submatrix (possibly different for 3850 each row) or NULL, if o_nz is used to specify the nonzero 3851 structure. The size of this array is equal to the number 3852 of local rows, i.e 'm'. 3853 3854 If the *_nnz parameter is given then the *_nz parameter is ignored 3855 3856 The AIJ format (also called the Yale sparse matrix format or 3857 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3858 storage. The stored row and column indices begin with zero. 3859 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3860 3861 The parallel matrix is partitioned such that the first m0 rows belong to 3862 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3863 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3864 3865 The DIAGONAL portion of the local submatrix of a processor can be defined 3866 as the submatrix which is obtained by extraction the part corresponding to 3867 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3868 first row that belongs to the processor, r2 is the last row belonging to 3869 the this processor, and c1-c2 is range of indices of the local part of a 3870 vector suitable for applying the matrix to. This is an mxn matrix. In the 3871 common case of a square matrix, the row and column ranges are the same and 3872 the DIAGONAL part is also square. The remaining portion of the local 3873 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3874 3875 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3876 3877 You can call MatGetInfo() to get information on how effective the preallocation was; 3878 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3879 You can also run with the option -info and look for messages with the string 3880 malloc in them to see if additional memory allocation was needed. 3881 3882 Example usage: 3883 3884 Consider the following 8x8 matrix with 34 non-zero values, that is 3885 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3886 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3887 as follows: 3888 3889 .vb 3890 1 2 0 | 0 3 0 | 0 4 3891 Proc0 0 5 6 | 7 0 0 | 8 0 3892 9 0 10 | 11 0 0 | 12 0 3893 ------------------------------------- 3894 13 0 14 | 15 16 17 | 0 0 3895 Proc1 0 18 0 | 19 20 21 | 0 0 3896 0 0 0 | 22 23 0 | 24 0 3897 ------------------------------------- 3898 Proc2 25 26 27 | 0 0 28 | 29 0 3899 30 0 0 | 31 32 33 | 0 34 3900 .ve 3901 3902 This can be represented as a collection of submatrices as: 3903 3904 .vb 3905 A B C 3906 D E F 3907 G H I 3908 .ve 3909 3910 Where the submatrices A,B,C are owned by proc0, D,E,F are 3911 owned by proc1, G,H,I are owned by proc2. 3912 3913 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3914 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3915 The 'M','N' parameters are 8,8, and have the same values on all procs. 3916 3917 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3918 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3919 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3920 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3921 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3922 matrix, ans [DF] as another SeqAIJ matrix. 3923 3924 When d_nz, o_nz parameters are specified, d_nz storage elements are 3925 allocated for every row of the local diagonal submatrix, and o_nz 3926 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3927 One way to choose d_nz and o_nz is to use the max nonzerors per local 3928 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3929 In this case, the values of d_nz,o_nz are: 3930 .vb 3931 proc0 : dnz = 2, o_nz = 2 3932 proc1 : dnz = 3, o_nz = 2 3933 proc2 : dnz = 1, o_nz = 4 3934 .ve 3935 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3936 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3937 for proc3. i.e we are using 12+15+10=37 storage locations to store 3938 34 values. 3939 3940 When d_nnz, o_nnz parameters are specified, the storage is specified 3941 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3942 In the above case the values for d_nnz,o_nnz are: 3943 .vb 3944 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3945 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3946 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3947 .ve 3948 Here the space allocated is sum of all the above values i.e 34, and 3949 hence pre-allocation is perfect. 3950 3951 Level: intermediate 3952 3953 .keywords: matrix, aij, compressed row, sparse, parallel 3954 3955 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3956 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3957 @*/ 3958 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3959 { 3960 PetscErrorCode ierr; 3961 3962 PetscFunctionBegin; 3963 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3964 PetscValidType(B,1); 3965 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3966 PetscFunctionReturn(0); 3967 } 3968 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3971 /*@ 3972 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3973 CSR format the local rows. 3974 3975 Collective on MPI_Comm 3976 3977 Input Parameters: 3978 + comm - MPI communicator 3979 . m - number of local rows (Cannot be PETSC_DECIDE) 3980 . n - This value should be the same as the local size used in creating the 3981 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3982 calculated if N is given) For square matrices n is almost always m. 3983 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3984 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3985 . i - row indices 3986 . j - column indices 3987 - a - matrix values 3988 3989 Output Parameter: 3990 . mat - the matrix 3991 3992 Level: intermediate 3993 3994 Notes: 3995 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3996 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3997 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3998 3999 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4000 4001 The format which is used for the sparse matrix input, is equivalent to a 4002 row-major ordering.. i.e for the following matrix, the input data expected is 4003 as shown: 4004 4005 1 0 0 4006 2 0 3 P0 4007 ------- 4008 4 5 6 P1 4009 4010 Process0 [P0]: rows_owned=[0,1] 4011 i = {0,1,3} [size = nrow+1 = 2+1] 4012 j = {0,0,2} [size = nz = 6] 4013 v = {1,2,3} [size = nz = 6] 4014 4015 Process1 [P1]: rows_owned=[2] 4016 i = {0,3} [size = nrow+1 = 1+1] 4017 j = {0,1,2} [size = nz = 6] 4018 v = {4,5,6} [size = nz = 6] 4019 4020 .keywords: matrix, aij, compressed row, sparse, parallel 4021 4022 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4023 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4024 @*/ 4025 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4026 { 4027 PetscErrorCode ierr; 4028 4029 PetscFunctionBegin; 4030 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4031 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4032 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4033 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4034 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4035 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4036 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4037 PetscFunctionReturn(0); 4038 } 4039 4040 #undef __FUNCT__ 4041 #define __FUNCT__ "MatCreateAIJ" 4042 /*@C 4043 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4044 (the default parallel PETSc format). For good matrix assembly performance 4045 the user should preallocate the matrix storage by setting the parameters 4046 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4047 performance can be increased by more than a factor of 50. 4048 4049 Collective on MPI_Comm 4050 4051 Input Parameters: 4052 + comm - MPI communicator 4053 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4054 This value should be the same as the local size used in creating the 4055 y vector for the matrix-vector product y = Ax. 4056 . n - This value should be the same as the local size used in creating the 4057 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4058 calculated if N is given) For square matrices n is almost always m. 4059 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4060 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4061 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4062 (same value is used for all local rows) 4063 . d_nnz - array containing the number of nonzeros in the various rows of the 4064 DIAGONAL portion of the local submatrix (possibly different for each row) 4065 or NULL, if d_nz is used to specify the nonzero structure. 4066 The size of this array is equal to the number of local rows, i.e 'm'. 4067 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4068 submatrix (same value is used for all local rows). 4069 - o_nnz - array containing the number of nonzeros in the various rows of the 4070 OFF-DIAGONAL portion of the local submatrix (possibly different for 4071 each row) or NULL, if o_nz is used to specify the nonzero 4072 structure. The size of this array is equal to the number 4073 of local rows, i.e 'm'. 4074 4075 Output Parameter: 4076 . A - the matrix 4077 4078 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4079 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4080 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4081 4082 Notes: 4083 If the *_nnz parameter is given then the *_nz parameter is ignored 4084 4085 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4086 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4087 storage requirements for this matrix. 4088 4089 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4090 processor than it must be used on all processors that share the object for 4091 that argument. 4092 4093 The user MUST specify either the local or global matrix dimensions 4094 (possibly both). 4095 4096 The parallel matrix is partitioned across processors such that the 4097 first m0 rows belong to process 0, the next m1 rows belong to 4098 process 1, the next m2 rows belong to process 2 etc.. where 4099 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4100 values corresponding to [m x N] submatrix. 4101 4102 The columns are logically partitioned with the n0 columns belonging 4103 to 0th partition, the next n1 columns belonging to the next 4104 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4105 4106 The DIAGONAL portion of the local submatrix on any given processor 4107 is the submatrix corresponding to the rows and columns m,n 4108 corresponding to the given processor. i.e diagonal matrix on 4109 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4110 etc. The remaining portion of the local submatrix [m x (N-n)] 4111 constitute the OFF-DIAGONAL portion. The example below better 4112 illustrates this concept. 4113 4114 For a square global matrix we define each processor's diagonal portion 4115 to be its local rows and the corresponding columns (a square submatrix); 4116 each processor's off-diagonal portion encompasses the remainder of the 4117 local matrix (a rectangular submatrix). 4118 4119 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4120 4121 When calling this routine with a single process communicator, a matrix of 4122 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4123 type of communicator, use the construction mechanism: 4124 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4125 4126 By default, this format uses inodes (identical nodes) when possible. 4127 We search for consecutive rows with the same nonzero structure, thereby 4128 reusing matrix information to achieve increased efficiency. 4129 4130 Options Database Keys: 4131 + -mat_no_inode - Do not use inodes 4132 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4133 - -mat_aij_oneindex - Internally use indexing starting at 1 4134 rather than 0. Note that when calling MatSetValues(), 4135 the user still MUST index entries starting at 0! 4136 4137 4138 Example usage: 4139 4140 Consider the following 8x8 matrix with 34 non-zero values, that is 4141 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4142 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4143 as follows: 4144 4145 .vb 4146 1 2 0 | 0 3 0 | 0 4 4147 Proc0 0 5 6 | 7 0 0 | 8 0 4148 9 0 10 | 11 0 0 | 12 0 4149 ------------------------------------- 4150 13 0 14 | 15 16 17 | 0 0 4151 Proc1 0 18 0 | 19 20 21 | 0 0 4152 0 0 0 | 22 23 0 | 24 0 4153 ------------------------------------- 4154 Proc2 25 26 27 | 0 0 28 | 29 0 4155 30 0 0 | 31 32 33 | 0 34 4156 .ve 4157 4158 This can be represented as a collection of submatrices as: 4159 4160 .vb 4161 A B C 4162 D E F 4163 G H I 4164 .ve 4165 4166 Where the submatrices A,B,C are owned by proc0, D,E,F are 4167 owned by proc1, G,H,I are owned by proc2. 4168 4169 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4170 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4171 The 'M','N' parameters are 8,8, and have the same values on all procs. 4172 4173 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4174 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4175 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4176 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4177 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4178 matrix, ans [DF] as another SeqAIJ matrix. 4179 4180 When d_nz, o_nz parameters are specified, d_nz storage elements are 4181 allocated for every row of the local diagonal submatrix, and o_nz 4182 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4183 One way to choose d_nz and o_nz is to use the max nonzerors per local 4184 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4185 In this case, the values of d_nz,o_nz are: 4186 .vb 4187 proc0 : dnz = 2, o_nz = 2 4188 proc1 : dnz = 3, o_nz = 2 4189 proc2 : dnz = 1, o_nz = 4 4190 .ve 4191 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4192 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4193 for proc3. i.e we are using 12+15+10=37 storage locations to store 4194 34 values. 4195 4196 When d_nnz, o_nnz parameters are specified, the storage is specified 4197 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4198 In the above case the values for d_nnz,o_nnz are: 4199 .vb 4200 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4201 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4202 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4203 .ve 4204 Here the space allocated is sum of all the above values i.e 34, and 4205 hence pre-allocation is perfect. 4206 4207 Level: intermediate 4208 4209 .keywords: matrix, aij, compressed row, sparse, parallel 4210 4211 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4212 MPIAIJ, MatCreateMPIAIJWithArrays() 4213 @*/ 4214 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4215 { 4216 PetscErrorCode ierr; 4217 PetscMPIInt size; 4218 4219 PetscFunctionBegin; 4220 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4221 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4222 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4223 if (size > 1) { 4224 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4225 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4226 } else { 4227 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4228 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4229 } 4230 PetscFunctionReturn(0); 4231 } 4232 4233 #undef __FUNCT__ 4234 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4235 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4236 { 4237 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4238 4239 PetscFunctionBegin; 4240 *Ad = a->A; 4241 *Ao = a->B; 4242 *colmap = a->garray; 4243 PetscFunctionReturn(0); 4244 } 4245 4246 #undef __FUNCT__ 4247 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4248 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4249 { 4250 PetscErrorCode ierr; 4251 PetscInt i; 4252 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4253 4254 PetscFunctionBegin; 4255 if (coloring->ctype == IS_COLORING_GLOBAL) { 4256 ISColoringValue *allcolors,*colors; 4257 ISColoring ocoloring; 4258 4259 /* set coloring for diagonal portion */ 4260 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4261 4262 /* set coloring for off-diagonal portion */ 4263 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4264 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4265 for (i=0; i<a->B->cmap->n; i++) { 4266 colors[i] = allcolors[a->garray[i]]; 4267 } 4268 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4269 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4270 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4271 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4272 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4273 ISColoringValue *colors; 4274 PetscInt *larray; 4275 ISColoring ocoloring; 4276 4277 /* set coloring for diagonal portion */ 4278 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4279 for (i=0; i<a->A->cmap->n; i++) { 4280 larray[i] = i + A->cmap->rstart; 4281 } 4282 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4283 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4284 for (i=0; i<a->A->cmap->n; i++) { 4285 colors[i] = coloring->colors[larray[i]]; 4286 } 4287 ierr = PetscFree(larray);CHKERRQ(ierr); 4288 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4289 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4290 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4291 4292 /* set coloring for off-diagonal portion */ 4293 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4294 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4295 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4296 for (i=0; i<a->B->cmap->n; i++) { 4297 colors[i] = coloring->colors[larray[i]]; 4298 } 4299 ierr = PetscFree(larray);CHKERRQ(ierr); 4300 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4301 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4302 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4303 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4304 PetscFunctionReturn(0); 4305 } 4306 4307 #undef __FUNCT__ 4308 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4309 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4310 { 4311 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4312 PetscErrorCode ierr; 4313 4314 PetscFunctionBegin; 4315 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4316 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4317 PetscFunctionReturn(0); 4318 } 4319 4320 #undef __FUNCT__ 4321 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4322 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4323 { 4324 PetscErrorCode ierr; 4325 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4326 PetscInt *indx; 4327 4328 PetscFunctionBegin; 4329 /* This routine will ONLY return MPIAIJ type matrix */ 4330 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4331 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4332 if (n == PETSC_DECIDE) { 4333 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4334 } 4335 /* Check sum(n) = N */ 4336 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4337 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4338 4339 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4340 rstart -= m; 4341 4342 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4343 for (i=0; i<m; i++) { 4344 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4345 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4346 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4347 } 4348 4349 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4350 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4351 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4352 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4353 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4354 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4355 PetscFunctionReturn(0); 4356 } 4357 4358 #undef __FUNCT__ 4359 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4360 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4361 { 4362 PetscErrorCode ierr; 4363 PetscInt m,N,i,rstart,nnz,Ii; 4364 PetscInt *indx; 4365 PetscScalar *values; 4366 4367 PetscFunctionBegin; 4368 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4369 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4370 for (i=0; i<m; i++) { 4371 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4372 Ii = i + rstart; 4373 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4374 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4375 } 4376 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4377 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4378 PetscFunctionReturn(0); 4379 } 4380 4381 #undef __FUNCT__ 4382 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4383 /*@ 4384 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4385 matrices from each processor 4386 4387 Collective on MPI_Comm 4388 4389 Input Parameters: 4390 + comm - the communicators the parallel matrix will live on 4391 . inmat - the input sequential matrices 4392 . n - number of local columns (or PETSC_DECIDE) 4393 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4394 4395 Output Parameter: 4396 . outmat - the parallel matrix generated 4397 4398 Level: advanced 4399 4400 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4401 4402 @*/ 4403 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4404 { 4405 PetscErrorCode ierr; 4406 PetscMPIInt size; 4407 4408 PetscFunctionBegin; 4409 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4410 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4411 if (size == 1) { 4412 if (scall == MAT_INITIAL_MATRIX) { 4413 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4414 } else { 4415 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4416 } 4417 } else { 4418 if (scall == MAT_INITIAL_MATRIX) { 4419 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4420 } 4421 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4422 } 4423 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4424 PetscFunctionReturn(0); 4425 } 4426 4427 #undef __FUNCT__ 4428 #define __FUNCT__ "MatFileSplit" 4429 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4430 { 4431 PetscErrorCode ierr; 4432 PetscMPIInt rank; 4433 PetscInt m,N,i,rstart,nnz; 4434 size_t len; 4435 const PetscInt *indx; 4436 PetscViewer out; 4437 char *name; 4438 Mat B; 4439 const PetscScalar *values; 4440 4441 PetscFunctionBegin; 4442 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4443 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4444 /* Should this be the type of the diagonal block of A? */ 4445 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4446 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4447 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4448 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4449 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4450 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4451 for (i=0; i<m; i++) { 4452 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4453 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4454 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4455 } 4456 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4457 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4458 4459 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4460 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4461 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4462 sprintf(name,"%s.%d",outfile,rank); 4463 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4464 ierr = PetscFree(name);CHKERRQ(ierr); 4465 ierr = MatView(B,out);CHKERRQ(ierr); 4466 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4467 ierr = MatDestroy(&B);CHKERRQ(ierr); 4468 PetscFunctionReturn(0); 4469 } 4470 4471 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4472 #undef __FUNCT__ 4473 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4474 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4475 { 4476 PetscErrorCode ierr; 4477 Mat_Merge_SeqsToMPI *merge; 4478 PetscContainer container; 4479 4480 PetscFunctionBegin; 4481 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4482 if (container) { 4483 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4487 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4496 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4497 ierr = PetscFree(merge);CHKERRQ(ierr); 4498 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4499 } 4500 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4501 PetscFunctionReturn(0); 4502 } 4503 4504 #include <../src/mat/utils/freespace.h> 4505 #include <petscbt.h> 4506 4507 #undef __FUNCT__ 4508 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4509 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4510 { 4511 PetscErrorCode ierr; 4512 MPI_Comm comm; 4513 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4514 PetscMPIInt size,rank,taga,*len_s; 4515 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4516 PetscInt proc,m; 4517 PetscInt **buf_ri,**buf_rj; 4518 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4519 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4520 MPI_Request *s_waits,*r_waits; 4521 MPI_Status *status; 4522 MatScalar *aa=a->a; 4523 MatScalar **abuf_r,*ba_i; 4524 Mat_Merge_SeqsToMPI *merge; 4525 PetscContainer container; 4526 4527 PetscFunctionBegin; 4528 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4529 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4530 4531 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4532 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4533 4534 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4535 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4536 4537 bi = merge->bi; 4538 bj = merge->bj; 4539 buf_ri = merge->buf_ri; 4540 buf_rj = merge->buf_rj; 4541 4542 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4543 owners = merge->rowmap->range; 4544 len_s = merge->len_s; 4545 4546 /* send and recv matrix values */ 4547 /*-----------------------------*/ 4548 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4549 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4550 4551 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4552 for (proc=0,k=0; proc<size; proc++) { 4553 if (!len_s[proc]) continue; 4554 i = owners[proc]; 4555 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4556 k++; 4557 } 4558 4559 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4560 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4561 ierr = PetscFree(status);CHKERRQ(ierr); 4562 4563 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4564 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4565 4566 /* insert mat values of mpimat */ 4567 /*----------------------------*/ 4568 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4569 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4570 4571 for (k=0; k<merge->nrecv; k++) { 4572 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4573 nrows = *(buf_ri_k[k]); 4574 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4575 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4576 } 4577 4578 /* set values of ba */ 4579 m = merge->rowmap->n; 4580 for (i=0; i<m; i++) { 4581 arow = owners[rank] + i; 4582 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4583 bnzi = bi[i+1] - bi[i]; 4584 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4585 4586 /* add local non-zero vals of this proc's seqmat into ba */ 4587 anzi = ai[arow+1] - ai[arow]; 4588 aj = a->j + ai[arow]; 4589 aa = a->a + ai[arow]; 4590 nextaj = 0; 4591 for (j=0; nextaj<anzi; j++) { 4592 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4593 ba_i[j] += aa[nextaj++]; 4594 } 4595 } 4596 4597 /* add received vals into ba */ 4598 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4599 /* i-th row */ 4600 if (i == *nextrow[k]) { 4601 anzi = *(nextai[k]+1) - *nextai[k]; 4602 aj = buf_rj[k] + *(nextai[k]); 4603 aa = abuf_r[k] + *(nextai[k]); 4604 nextaj = 0; 4605 for (j=0; nextaj<anzi; j++) { 4606 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4607 ba_i[j] += aa[nextaj++]; 4608 } 4609 } 4610 nextrow[k]++; nextai[k]++; 4611 } 4612 } 4613 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4614 } 4615 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4616 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4617 4618 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4619 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4620 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4621 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4622 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4623 PetscFunctionReturn(0); 4624 } 4625 4626 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4627 4628 #undef __FUNCT__ 4629 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4630 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4631 { 4632 PetscErrorCode ierr; 4633 Mat B_mpi; 4634 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4635 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4636 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4637 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4638 PetscInt len,proc,*dnz,*onz,bs,cbs; 4639 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4640 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4641 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4642 MPI_Status *status; 4643 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4644 PetscBT lnkbt; 4645 Mat_Merge_SeqsToMPI *merge; 4646 PetscContainer container; 4647 4648 PetscFunctionBegin; 4649 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4650 4651 /* make sure it is a PETSc comm */ 4652 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4653 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4654 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4655 4656 ierr = PetscNew(&merge);CHKERRQ(ierr); 4657 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4658 4659 /* determine row ownership */ 4660 /*---------------------------------------------------------*/ 4661 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4662 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4663 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4664 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4665 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4666 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4667 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4668 4669 m = merge->rowmap->n; 4670 owners = merge->rowmap->range; 4671 4672 /* determine the number of messages to send, their lengths */ 4673 /*---------------------------------------------------------*/ 4674 len_s = merge->len_s; 4675 4676 len = 0; /* length of buf_si[] */ 4677 merge->nsend = 0; 4678 for (proc=0; proc<size; proc++) { 4679 len_si[proc] = 0; 4680 if (proc == rank) { 4681 len_s[proc] = 0; 4682 } else { 4683 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4684 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4685 } 4686 if (len_s[proc]) { 4687 merge->nsend++; 4688 nrows = 0; 4689 for (i=owners[proc]; i<owners[proc+1]; i++) { 4690 if (ai[i+1] > ai[i]) nrows++; 4691 } 4692 len_si[proc] = 2*(nrows+1); 4693 len += len_si[proc]; 4694 } 4695 } 4696 4697 /* determine the number and length of messages to receive for ij-structure */ 4698 /*-------------------------------------------------------------------------*/ 4699 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4700 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4701 4702 /* post the Irecv of j-structure */ 4703 /*-------------------------------*/ 4704 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4705 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4706 4707 /* post the Isend of j-structure */ 4708 /*--------------------------------*/ 4709 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4710 4711 for (proc=0, k=0; proc<size; proc++) { 4712 if (!len_s[proc]) continue; 4713 i = owners[proc]; 4714 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4715 k++; 4716 } 4717 4718 /* receives and sends of j-structure are complete */ 4719 /*------------------------------------------------*/ 4720 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4721 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4722 4723 /* send and recv i-structure */ 4724 /*---------------------------*/ 4725 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4726 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4727 4728 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4729 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4730 for (proc=0,k=0; proc<size; proc++) { 4731 if (!len_s[proc]) continue; 4732 /* form outgoing message for i-structure: 4733 buf_si[0]: nrows to be sent 4734 [1:nrows]: row index (global) 4735 [nrows+1:2*nrows+1]: i-structure index 4736 */ 4737 /*-------------------------------------------*/ 4738 nrows = len_si[proc]/2 - 1; 4739 buf_si_i = buf_si + nrows+1; 4740 buf_si[0] = nrows; 4741 buf_si_i[0] = 0; 4742 nrows = 0; 4743 for (i=owners[proc]; i<owners[proc+1]; i++) { 4744 anzi = ai[i+1] - ai[i]; 4745 if (anzi) { 4746 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4747 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4748 nrows++; 4749 } 4750 } 4751 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4752 k++; 4753 buf_si += len_si[proc]; 4754 } 4755 4756 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4757 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4758 4759 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4760 for (i=0; i<merge->nrecv; i++) { 4761 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4762 } 4763 4764 ierr = PetscFree(len_si);CHKERRQ(ierr); 4765 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4766 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4767 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4768 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4769 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4770 ierr = PetscFree(status);CHKERRQ(ierr); 4771 4772 /* compute a local seq matrix in each processor */ 4773 /*----------------------------------------------*/ 4774 /* allocate bi array and free space for accumulating nonzero column info */ 4775 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4776 bi[0] = 0; 4777 4778 /* create and initialize a linked list */ 4779 nlnk = N+1; 4780 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4781 4782 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4783 len = ai[owners[rank+1]] - ai[owners[rank]]; 4784 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4785 4786 current_space = free_space; 4787 4788 /* determine symbolic info for each local row */ 4789 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4790 4791 for (k=0; k<merge->nrecv; k++) { 4792 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4793 nrows = *buf_ri_k[k]; 4794 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4795 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4796 } 4797 4798 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4799 len = 0; 4800 for (i=0; i<m; i++) { 4801 bnzi = 0; 4802 /* add local non-zero cols of this proc's seqmat into lnk */ 4803 arow = owners[rank] + i; 4804 anzi = ai[arow+1] - ai[arow]; 4805 aj = a->j + ai[arow]; 4806 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4807 bnzi += nlnk; 4808 /* add received col data into lnk */ 4809 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4810 if (i == *nextrow[k]) { /* i-th row */ 4811 anzi = *(nextai[k]+1) - *nextai[k]; 4812 aj = buf_rj[k] + *nextai[k]; 4813 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4814 bnzi += nlnk; 4815 nextrow[k]++; nextai[k]++; 4816 } 4817 } 4818 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4819 4820 /* if free space is not available, make more free space */ 4821 if (current_space->local_remaining<bnzi) { 4822 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4823 nspacedouble++; 4824 } 4825 /* copy data into free space, then initialize lnk */ 4826 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4827 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4828 4829 current_space->array += bnzi; 4830 current_space->local_used += bnzi; 4831 current_space->local_remaining -= bnzi; 4832 4833 bi[i+1] = bi[i] + bnzi; 4834 } 4835 4836 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4837 4838 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4839 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4840 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4841 4842 /* create symbolic parallel matrix B_mpi */ 4843 /*---------------------------------------*/ 4844 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4845 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4846 if (n==PETSC_DECIDE) { 4847 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4848 } else { 4849 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4850 } 4851 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4852 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4853 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4854 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4855 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4856 4857 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4858 B_mpi->assembled = PETSC_FALSE; 4859 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4860 merge->bi = bi; 4861 merge->bj = bj; 4862 merge->buf_ri = buf_ri; 4863 merge->buf_rj = buf_rj; 4864 merge->coi = NULL; 4865 merge->coj = NULL; 4866 merge->owners_co = NULL; 4867 4868 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4869 4870 /* attach the supporting struct to B_mpi for reuse */ 4871 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4872 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4873 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4874 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4875 *mpimat = B_mpi; 4876 4877 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4878 PetscFunctionReturn(0); 4879 } 4880 4881 #undef __FUNCT__ 4882 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4883 /*@C 4884 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4885 matrices from each processor 4886 4887 Collective on MPI_Comm 4888 4889 Input Parameters: 4890 + comm - the communicators the parallel matrix will live on 4891 . seqmat - the input sequential matrices 4892 . m - number of local rows (or PETSC_DECIDE) 4893 . n - number of local columns (or PETSC_DECIDE) 4894 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4895 4896 Output Parameter: 4897 . mpimat - the parallel matrix generated 4898 4899 Level: advanced 4900 4901 Notes: 4902 The dimensions of the sequential matrix in each processor MUST be the same. 4903 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4904 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4905 @*/ 4906 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4907 { 4908 PetscErrorCode ierr; 4909 PetscMPIInt size; 4910 4911 PetscFunctionBegin; 4912 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4913 if (size == 1) { 4914 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4915 if (scall == MAT_INITIAL_MATRIX) { 4916 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4917 } else { 4918 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4919 } 4920 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4921 PetscFunctionReturn(0); 4922 } 4923 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4924 if (scall == MAT_INITIAL_MATRIX) { 4925 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4926 } 4927 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4928 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4929 PetscFunctionReturn(0); 4930 } 4931 4932 #undef __FUNCT__ 4933 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4934 /*@ 4935 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4936 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4937 with MatGetSize() 4938 4939 Not Collective 4940 4941 Input Parameters: 4942 + A - the matrix 4943 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4944 4945 Output Parameter: 4946 . A_loc - the local sequential matrix generated 4947 4948 Level: developer 4949 4950 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4951 4952 @*/ 4953 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4954 { 4955 PetscErrorCode ierr; 4956 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4957 Mat_SeqAIJ *mat,*a,*b; 4958 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4959 MatScalar *aa,*ba,*cam; 4960 PetscScalar *ca; 4961 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4962 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4963 PetscBool match; 4964 4965 PetscFunctionBegin; 4966 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4967 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4968 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4969 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4970 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4971 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4972 aa = a->a; ba = b->a; 4973 if (scall == MAT_INITIAL_MATRIX) { 4974 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4975 ci[0] = 0; 4976 for (i=0; i<am; i++) { 4977 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4978 } 4979 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4980 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4981 k = 0; 4982 for (i=0; i<am; i++) { 4983 ncols_o = bi[i+1] - bi[i]; 4984 ncols_d = ai[i+1] - ai[i]; 4985 /* off-diagonal portion of A */ 4986 for (jo=0; jo<ncols_o; jo++) { 4987 col = cmap[*bj]; 4988 if (col >= cstart) break; 4989 cj[k] = col; bj++; 4990 ca[k++] = *ba++; 4991 } 4992 /* diagonal portion of A */ 4993 for (j=0; j<ncols_d; j++) { 4994 cj[k] = cstart + *aj++; 4995 ca[k++] = *aa++; 4996 } 4997 /* off-diagonal portion of A */ 4998 for (j=jo; j<ncols_o; j++) { 4999 cj[k] = cmap[*bj++]; 5000 ca[k++] = *ba++; 5001 } 5002 } 5003 /* put together the new matrix */ 5004 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5005 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5006 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5007 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5008 mat->free_a = PETSC_TRUE; 5009 mat->free_ij = PETSC_TRUE; 5010 mat->nonew = 0; 5011 } else if (scall == MAT_REUSE_MATRIX) { 5012 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5013 ci = mat->i; cj = mat->j; cam = mat->a; 5014 for (i=0; i<am; i++) { 5015 /* off-diagonal portion of A */ 5016 ncols_o = bi[i+1] - bi[i]; 5017 for (jo=0; jo<ncols_o; jo++) { 5018 col = cmap[*bj]; 5019 if (col >= cstart) break; 5020 *cam++ = *ba++; bj++; 5021 } 5022 /* diagonal portion of A */ 5023 ncols_d = ai[i+1] - ai[i]; 5024 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5025 /* off-diagonal portion of A */ 5026 for (j=jo; j<ncols_o; j++) { 5027 *cam++ = *ba++; bj++; 5028 } 5029 } 5030 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5031 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5032 PetscFunctionReturn(0); 5033 } 5034 5035 #undef __FUNCT__ 5036 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5037 /*@C 5038 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5039 5040 Not Collective 5041 5042 Input Parameters: 5043 + A - the matrix 5044 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5045 - row, col - index sets of rows and columns to extract (or NULL) 5046 5047 Output Parameter: 5048 . A_loc - the local sequential matrix generated 5049 5050 Level: developer 5051 5052 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5053 5054 @*/ 5055 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5056 { 5057 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5058 PetscErrorCode ierr; 5059 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5060 IS isrowa,iscola; 5061 Mat *aloc; 5062 PetscBool match; 5063 5064 PetscFunctionBegin; 5065 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5066 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5067 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5068 if (!row) { 5069 start = A->rmap->rstart; end = A->rmap->rend; 5070 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5071 } else { 5072 isrowa = *row; 5073 } 5074 if (!col) { 5075 start = A->cmap->rstart; 5076 cmap = a->garray; 5077 nzA = a->A->cmap->n; 5078 nzB = a->B->cmap->n; 5079 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5080 ncols = 0; 5081 for (i=0; i<nzB; i++) { 5082 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5083 else break; 5084 } 5085 imark = i; 5086 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5087 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5088 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5089 } else { 5090 iscola = *col; 5091 } 5092 if (scall != MAT_INITIAL_MATRIX) { 5093 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5094 aloc[0] = *A_loc; 5095 } 5096 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5097 *A_loc = aloc[0]; 5098 ierr = PetscFree(aloc);CHKERRQ(ierr); 5099 if (!row) { 5100 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5101 } 5102 if (!col) { 5103 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5104 } 5105 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5106 PetscFunctionReturn(0); 5107 } 5108 5109 #undef __FUNCT__ 5110 #define __FUNCT__ "MatGetBrowsOfAcols" 5111 /*@C 5112 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5113 5114 Collective on Mat 5115 5116 Input Parameters: 5117 + A,B - the matrices in mpiaij format 5118 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5119 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5120 5121 Output Parameter: 5122 + rowb, colb - index sets of rows and columns of B to extract 5123 - B_seq - the sequential matrix generated 5124 5125 Level: developer 5126 5127 @*/ 5128 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5129 { 5130 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5131 PetscErrorCode ierr; 5132 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5133 IS isrowb,iscolb; 5134 Mat *bseq=NULL; 5135 5136 PetscFunctionBegin; 5137 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5138 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5139 } 5140 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5141 5142 if (scall == MAT_INITIAL_MATRIX) { 5143 start = A->cmap->rstart; 5144 cmap = a->garray; 5145 nzA = a->A->cmap->n; 5146 nzB = a->B->cmap->n; 5147 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5148 ncols = 0; 5149 for (i=0; i<nzB; i++) { /* row < local row index */ 5150 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5151 else break; 5152 } 5153 imark = i; 5154 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5155 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5156 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5157 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5158 } else { 5159 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5160 isrowb = *rowb; iscolb = *colb; 5161 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5162 bseq[0] = *B_seq; 5163 } 5164 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5165 *B_seq = bseq[0]; 5166 ierr = PetscFree(bseq);CHKERRQ(ierr); 5167 if (!rowb) { 5168 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5169 } else { 5170 *rowb = isrowb; 5171 } 5172 if (!colb) { 5173 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5174 } else { 5175 *colb = iscolb; 5176 } 5177 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5178 PetscFunctionReturn(0); 5179 } 5180 5181 #undef __FUNCT__ 5182 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5183 /* 5184 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5185 of the OFF-DIAGONAL portion of local A 5186 5187 Collective on Mat 5188 5189 Input Parameters: 5190 + A,B - the matrices in mpiaij format 5191 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5192 5193 Output Parameter: 5194 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5195 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5196 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5197 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5198 5199 Level: developer 5200 5201 */ 5202 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5203 { 5204 VecScatter_MPI_General *gen_to,*gen_from; 5205 PetscErrorCode ierr; 5206 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5207 Mat_SeqAIJ *b_oth; 5208 VecScatter ctx =a->Mvctx; 5209 MPI_Comm comm; 5210 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5211 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5212 PetscScalar *rvalues,*svalues; 5213 MatScalar *b_otha,*bufa,*bufA; 5214 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5215 MPI_Request *rwaits = NULL,*swaits = NULL; 5216 MPI_Status *sstatus,rstatus; 5217 PetscMPIInt jj; 5218 PetscInt *cols,sbs,rbs; 5219 PetscScalar *vals; 5220 5221 PetscFunctionBegin; 5222 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5223 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5224 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5225 } 5226 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5227 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5228 5229 gen_to = (VecScatter_MPI_General*)ctx->todata; 5230 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5231 rvalues = gen_from->values; /* holds the length of receiving row */ 5232 svalues = gen_to->values; /* holds the length of sending row */ 5233 nrecvs = gen_from->n; 5234 nsends = gen_to->n; 5235 5236 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5237 srow = gen_to->indices; /* local row index to be sent */ 5238 sstarts = gen_to->starts; 5239 sprocs = gen_to->procs; 5240 sstatus = gen_to->sstatus; 5241 sbs = gen_to->bs; 5242 rstarts = gen_from->starts; 5243 rprocs = gen_from->procs; 5244 rbs = gen_from->bs; 5245 5246 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5247 if (scall == MAT_INITIAL_MATRIX) { 5248 /* i-array */ 5249 /*---------*/ 5250 /* post receives */ 5251 for (i=0; i<nrecvs; i++) { 5252 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5253 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5254 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5255 } 5256 5257 /* pack the outgoing message */ 5258 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5259 5260 sstartsj[0] = 0; 5261 rstartsj[0] = 0; 5262 len = 0; /* total length of j or a array to be sent */ 5263 k = 0; 5264 for (i=0; i<nsends; i++) { 5265 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5266 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5267 for (j=0; j<nrows; j++) { 5268 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5269 for (l=0; l<sbs; l++) { 5270 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5271 5272 rowlen[j*sbs+l] = ncols; 5273 5274 len += ncols; 5275 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5276 } 5277 k++; 5278 } 5279 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5280 5281 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5282 } 5283 /* recvs and sends of i-array are completed */ 5284 i = nrecvs; 5285 while (i--) { 5286 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5287 } 5288 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5289 5290 /* allocate buffers for sending j and a arrays */ 5291 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5292 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5293 5294 /* create i-array of B_oth */ 5295 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5296 5297 b_othi[0] = 0; 5298 len = 0; /* total length of j or a array to be received */ 5299 k = 0; 5300 for (i=0; i<nrecvs; i++) { 5301 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5302 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5303 for (j=0; j<nrows; j++) { 5304 b_othi[k+1] = b_othi[k] + rowlen[j]; 5305 len += rowlen[j]; k++; 5306 } 5307 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5308 } 5309 5310 /* allocate space for j and a arrrays of B_oth */ 5311 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5312 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5313 5314 /* j-array */ 5315 /*---------*/ 5316 /* post receives of j-array */ 5317 for (i=0; i<nrecvs; i++) { 5318 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5319 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5320 } 5321 5322 /* pack the outgoing message j-array */ 5323 k = 0; 5324 for (i=0; i<nsends; i++) { 5325 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5326 bufJ = bufj+sstartsj[i]; 5327 for (j=0; j<nrows; j++) { 5328 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5329 for (ll=0; ll<sbs; ll++) { 5330 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5331 for (l=0; l<ncols; l++) { 5332 *bufJ++ = cols[l]; 5333 } 5334 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5335 } 5336 } 5337 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5338 } 5339 5340 /* recvs and sends of j-array are completed */ 5341 i = nrecvs; 5342 while (i--) { 5343 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5344 } 5345 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5346 } else if (scall == MAT_REUSE_MATRIX) { 5347 sstartsj = *startsj_s; 5348 rstartsj = *startsj_r; 5349 bufa = *bufa_ptr; 5350 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5351 b_otha = b_oth->a; 5352 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5353 5354 /* a-array */ 5355 /*---------*/ 5356 /* post receives of a-array */ 5357 for (i=0; i<nrecvs; i++) { 5358 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5359 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5360 } 5361 5362 /* pack the outgoing message a-array */ 5363 k = 0; 5364 for (i=0; i<nsends; i++) { 5365 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5366 bufA = bufa+sstartsj[i]; 5367 for (j=0; j<nrows; j++) { 5368 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5369 for (ll=0; ll<sbs; ll++) { 5370 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5371 for (l=0; l<ncols; l++) { 5372 *bufA++ = vals[l]; 5373 } 5374 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5375 } 5376 } 5377 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5378 } 5379 /* recvs and sends of a-array are completed */ 5380 i = nrecvs; 5381 while (i--) { 5382 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5383 } 5384 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5385 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5386 5387 if (scall == MAT_INITIAL_MATRIX) { 5388 /* put together the new matrix */ 5389 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5390 5391 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5392 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5393 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5394 b_oth->free_a = PETSC_TRUE; 5395 b_oth->free_ij = PETSC_TRUE; 5396 b_oth->nonew = 0; 5397 5398 ierr = PetscFree(bufj);CHKERRQ(ierr); 5399 if (!startsj_s || !bufa_ptr) { 5400 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5401 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5402 } else { 5403 *startsj_s = sstartsj; 5404 *startsj_r = rstartsj; 5405 *bufa_ptr = bufa; 5406 } 5407 } 5408 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5409 PetscFunctionReturn(0); 5410 } 5411 5412 #undef __FUNCT__ 5413 #define __FUNCT__ "MatGetCommunicationStructs" 5414 /*@C 5415 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5416 5417 Not Collective 5418 5419 Input Parameters: 5420 . A - The matrix in mpiaij format 5421 5422 Output Parameter: 5423 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5424 . colmap - A map from global column index to local index into lvec 5425 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5426 5427 Level: developer 5428 5429 @*/ 5430 #if defined(PETSC_USE_CTABLE) 5431 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5432 #else 5433 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5434 #endif 5435 { 5436 Mat_MPIAIJ *a; 5437 5438 PetscFunctionBegin; 5439 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5440 PetscValidPointer(lvec, 2); 5441 PetscValidPointer(colmap, 3); 5442 PetscValidPointer(multScatter, 4); 5443 a = (Mat_MPIAIJ*) A->data; 5444 if (lvec) *lvec = a->lvec; 5445 if (colmap) *colmap = a->colmap; 5446 if (multScatter) *multScatter = a->Mvctx; 5447 PetscFunctionReturn(0); 5448 } 5449 5450 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5451 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5452 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5453 5454 #undef __FUNCT__ 5455 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5456 /* 5457 Computes (B'*A')' since computing B*A directly is untenable 5458 5459 n p p 5460 ( ) ( ) ( ) 5461 m ( A ) * n ( B ) = m ( C ) 5462 ( ) ( ) ( ) 5463 5464 */ 5465 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5466 { 5467 PetscErrorCode ierr; 5468 Mat At,Bt,Ct; 5469 5470 PetscFunctionBegin; 5471 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5472 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5473 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5474 ierr = MatDestroy(&At);CHKERRQ(ierr); 5475 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5476 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5477 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5478 PetscFunctionReturn(0); 5479 } 5480 5481 #undef __FUNCT__ 5482 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5483 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5484 { 5485 PetscErrorCode ierr; 5486 PetscInt m=A->rmap->n,n=B->cmap->n; 5487 Mat Cmat; 5488 5489 PetscFunctionBegin; 5490 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5491 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5492 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5493 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5494 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5495 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5496 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5497 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5498 5499 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5500 5501 *C = Cmat; 5502 PetscFunctionReturn(0); 5503 } 5504 5505 /* ----------------------------------------------------------------*/ 5506 #undef __FUNCT__ 5507 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5508 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5509 { 5510 PetscErrorCode ierr; 5511 5512 PetscFunctionBegin; 5513 if (scall == MAT_INITIAL_MATRIX) { 5514 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5515 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5516 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5517 } 5518 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5519 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5520 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5521 PetscFunctionReturn(0); 5522 } 5523 5524 #if defined(PETSC_HAVE_MUMPS) 5525 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5526 #endif 5527 #if defined(PETSC_HAVE_PASTIX) 5528 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5529 #endif 5530 #if defined(PETSC_HAVE_SUPERLU_DIST) 5531 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5532 #endif 5533 #if defined(PETSC_HAVE_CLIQUE) 5534 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5535 #endif 5536 5537 /*MC 5538 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5539 5540 Options Database Keys: 5541 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5542 5543 Level: beginner 5544 5545 .seealso: MatCreateAIJ() 5546 M*/ 5547 5548 #undef __FUNCT__ 5549 #define __FUNCT__ "MatCreate_MPIAIJ" 5550 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5551 { 5552 Mat_MPIAIJ *b; 5553 PetscErrorCode ierr; 5554 PetscMPIInt size; 5555 5556 PetscFunctionBegin; 5557 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5558 5559 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5560 B->data = (void*)b; 5561 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5562 B->assembled = PETSC_FALSE; 5563 B->insertmode = NOT_SET_VALUES; 5564 b->size = size; 5565 5566 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5567 5568 /* build cache for off array entries formed */ 5569 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5570 5571 b->donotstash = PETSC_FALSE; 5572 b->colmap = 0; 5573 b->garray = 0; 5574 b->roworiented = PETSC_TRUE; 5575 5576 /* stuff used for matrix vector multiply */ 5577 b->lvec = NULL; 5578 b->Mvctx = NULL; 5579 5580 /* stuff for MatGetRow() */ 5581 b->rowindices = 0; 5582 b->rowvalues = 0; 5583 b->getrowactive = PETSC_FALSE; 5584 5585 /* flexible pointer used in CUSP/CUSPARSE classes */ 5586 b->spptr = NULL; 5587 5588 #if defined(PETSC_HAVE_MUMPS) 5589 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5590 #endif 5591 #if defined(PETSC_HAVE_PASTIX) 5592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5593 #endif 5594 #if defined(PETSC_HAVE_SUPERLU_DIST) 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5596 #endif 5597 #if defined(PETSC_HAVE_CLIQUE) 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5599 #endif 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5614 PetscFunctionReturn(0); 5615 } 5616 5617 #undef __FUNCT__ 5618 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5619 /*@ 5620 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5621 and "off-diagonal" part of the matrix in CSR format. 5622 5623 Collective on MPI_Comm 5624 5625 Input Parameters: 5626 + comm - MPI communicator 5627 . m - number of local rows (Cannot be PETSC_DECIDE) 5628 . n - This value should be the same as the local size used in creating the 5629 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5630 calculated if N is given) For square matrices n is almost always m. 5631 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5632 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5633 . i - row indices for "diagonal" portion of matrix 5634 . j - column indices 5635 . a - matrix values 5636 . oi - row indices for "off-diagonal" portion of matrix 5637 . oj - column indices 5638 - oa - matrix values 5639 5640 Output Parameter: 5641 . mat - the matrix 5642 5643 Level: advanced 5644 5645 Notes: 5646 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5647 must free the arrays once the matrix has been destroyed and not before. 5648 5649 The i and j indices are 0 based 5650 5651 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5652 5653 This sets local rows and cannot be used to set off-processor values. 5654 5655 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5656 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5657 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5658 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5659 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5660 communication if it is known that only local entries will be set. 5661 5662 .keywords: matrix, aij, compressed row, sparse, parallel 5663 5664 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5665 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5666 @*/ 5667 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5668 { 5669 PetscErrorCode ierr; 5670 Mat_MPIAIJ *maij; 5671 5672 PetscFunctionBegin; 5673 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5674 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5675 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5676 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5677 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5678 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5679 maij = (Mat_MPIAIJ*) (*mat)->data; 5680 5681 (*mat)->preallocated = PETSC_TRUE; 5682 5683 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5684 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5685 5686 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5687 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5688 5689 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5690 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5691 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5692 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5693 5694 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5695 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5696 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5697 PetscFunctionReturn(0); 5698 } 5699 5700 /* 5701 Special version for direct calls from Fortran 5702 */ 5703 #include <petsc-private/fortranimpl.h> 5704 5705 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5706 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5707 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5708 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5709 #endif 5710 5711 /* Change these macros so can be used in void function */ 5712 #undef CHKERRQ 5713 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5714 #undef SETERRQ2 5715 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5716 #undef SETERRQ3 5717 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5718 #undef SETERRQ 5719 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5720 5721 #undef __FUNCT__ 5722 #define __FUNCT__ "matsetvaluesmpiaij_" 5723 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5724 { 5725 Mat mat = *mmat; 5726 PetscInt m = *mm, n = *mn; 5727 InsertMode addv = *maddv; 5728 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5729 PetscScalar value; 5730 PetscErrorCode ierr; 5731 5732 MatCheckPreallocated(mat,1); 5733 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5734 5735 #if defined(PETSC_USE_DEBUG) 5736 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5737 #endif 5738 { 5739 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5740 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5741 PetscBool roworiented = aij->roworiented; 5742 5743 /* Some Variables required in the macro */ 5744 Mat A = aij->A; 5745 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5746 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5747 MatScalar *aa = a->a; 5748 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5749 Mat B = aij->B; 5750 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5751 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5752 MatScalar *ba = b->a; 5753 5754 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5755 PetscInt nonew = a->nonew; 5756 MatScalar *ap1,*ap2; 5757 5758 PetscFunctionBegin; 5759 for (i=0; i<m; i++) { 5760 if (im[i] < 0) continue; 5761 #if defined(PETSC_USE_DEBUG) 5762 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5763 #endif 5764 if (im[i] >= rstart && im[i] < rend) { 5765 row = im[i] - rstart; 5766 lastcol1 = -1; 5767 rp1 = aj + ai[row]; 5768 ap1 = aa + ai[row]; 5769 rmax1 = aimax[row]; 5770 nrow1 = ailen[row]; 5771 low1 = 0; 5772 high1 = nrow1; 5773 lastcol2 = -1; 5774 rp2 = bj + bi[row]; 5775 ap2 = ba + bi[row]; 5776 rmax2 = bimax[row]; 5777 nrow2 = bilen[row]; 5778 low2 = 0; 5779 high2 = nrow2; 5780 5781 for (j=0; j<n; j++) { 5782 if (roworiented) value = v[i*n+j]; 5783 else value = v[i+j*m]; 5784 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5785 if (in[j] >= cstart && in[j] < cend) { 5786 col = in[j] - cstart; 5787 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5788 } else if (in[j] < 0) continue; 5789 #if defined(PETSC_USE_DEBUG) 5790 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5791 #endif 5792 else { 5793 if (mat->was_assembled) { 5794 if (!aij->colmap) { 5795 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5796 } 5797 #if defined(PETSC_USE_CTABLE) 5798 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5799 col--; 5800 #else 5801 col = aij->colmap[in[j]] - 1; 5802 #endif 5803 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5804 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5805 col = in[j]; 5806 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5807 B = aij->B; 5808 b = (Mat_SeqAIJ*)B->data; 5809 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5810 rp2 = bj + bi[row]; 5811 ap2 = ba + bi[row]; 5812 rmax2 = bimax[row]; 5813 nrow2 = bilen[row]; 5814 low2 = 0; 5815 high2 = nrow2; 5816 bm = aij->B->rmap->n; 5817 ba = b->a; 5818 } 5819 } else col = in[j]; 5820 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5821 } 5822 } 5823 } else if (!aij->donotstash) { 5824 if (roworiented) { 5825 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5826 } else { 5827 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5828 } 5829 } 5830 } 5831 } 5832 PetscFunctionReturnVoid(); 5833 } 5834 5835