1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 if (!rank) { 206 bses[0] = gmat->rmap->bs; 207 bses[1] = gmat->cmap->bs; 208 } 209 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 210 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 211 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 212 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 213 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 214 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 215 216 rowners[0] = 0; 217 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 218 rstart = rowners[rank]; 219 rend = rowners[rank+1]; 220 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 221 if (!rank) { 222 gmata = (Mat_SeqAIJ*) gmat->data; 223 /* send row lengths to all processors */ 224 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 225 for (i=1; i<size; i++) { 226 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 227 } 228 /* determine number diagonal and off-diagonal counts */ 229 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 230 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 264 jj = 0; 265 for (i=0; i<m; i++) { 266 for (j=0; j<dlens[i]; j++) { 267 if (gmataj[jj] < rstart) ld[i]++; 268 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 269 jj++; 270 } 271 } 272 /* receive numerical values */ 273 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 275 } 276 /* set preallocation */ 277 for (i=0; i<m; i++) { 278 dlens[i] -= olens[i]; 279 } 280 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 281 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 282 283 for (i=0; i<m; i++) { 284 dlens[i] += olens[i]; 285 } 286 cnt = 0; 287 for (i=0; i<m; i++) { 288 row = rstart + i; 289 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 290 cnt += dlens[i]; 291 } 292 if (rank) { 293 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 294 } 295 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 296 ierr = PetscFree(rowners);CHKERRQ(ierr); 297 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 300 *inmat = mat; 301 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 302 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 303 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 304 mat = *inmat; 305 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 306 if (!rank) { 307 /* send numerical values to other processes */ 308 gmata = (Mat_SeqAIJ*) gmat->data; 309 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 310 gmataa = gmata->a; 311 for (i=1; i<size; i++) { 312 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 313 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 314 } 315 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 316 } else { 317 /* receive numerical values from process 0*/ 318 nz = Ad->nz + Ao->nz; 319 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 320 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 321 } 322 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 323 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 324 ad = Ad->a; 325 ao = Ao->a; 326 if (mat->rmap->n) { 327 i = 0; 328 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 329 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 330 } 331 for (i=1; i<mat->rmap->n; i++) { 332 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 333 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 334 } 335 i--; 336 if (mat->rmap->n) { 337 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 338 } 339 if (rank) { 340 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 341 } 342 } 343 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined(PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++) { 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 373 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 374 #endif 375 PetscFunctionReturn(0); 376 } 377 378 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 379 { \ 380 if (col <= lastcol1) low1 = 0; \ 381 else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; \ 416 else high2 = nrow2; \ 417 lastcol2 = col; \ 418 while (high2-low2 > 5) { \ 419 t = (low2+high2)/2; \ 420 if (rp2[t] > col) high2 = t; \ 421 else low2 = t; \ 422 } \ 423 for (_i=low2; _i<high2; _i++) { \ 424 if (rp2[_i] > col) break; \ 425 if (rp2[_i] == col) { \ 426 if (addv == ADD_VALUES) ap2[_i] += value; \ 427 else ap2[_i] = value; \ 428 goto b_noinsert; \ 429 } \ 430 } \ 431 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 433 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 434 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 435 N = nrow2++ - 1; b->nz++; high2++; \ 436 /* shift up all the later entries in this row */ \ 437 for (ii=N; ii>=_i; ii--) { \ 438 rp2[ii+1] = rp2[ii]; \ 439 ap2[ii+1] = ap2[ii]; \ 440 } \ 441 rp2[_i] = col; \ 442 ap2[_i] = value; \ 443 b_noinsert: ; \ 444 bilen[row] = nrow2; \ 445 } 446 447 #undef __FUNCT__ 448 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 449 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 450 { 451 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 452 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 453 PetscErrorCode ierr; 454 PetscInt l,*garray = mat->garray,diag; 455 456 PetscFunctionBegin; 457 /* code only works for square matrices A */ 458 459 /* find size of row to the left of the diagonal part */ 460 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 461 row = row - diag; 462 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 463 if (garray[b->j[b->i[row]+l]] > diag) break; 464 } 465 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 466 467 /* diagonal part */ 468 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 469 470 /* right of diagonal part */ 471 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatSetValues_MPIAIJ" 477 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 478 { 479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 480 PetscScalar value; 481 PetscErrorCode ierr; 482 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 483 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 484 PetscBool roworiented = aij->roworiented; 485 486 /* Some Variables required in the macro */ 487 Mat A = aij->A; 488 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 489 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 490 MatScalar *aa = a->a; 491 PetscBool ignorezeroentries = a->ignorezeroentries; 492 Mat B = aij->B; 493 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 494 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 495 MatScalar *ba = b->a; 496 497 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 498 PetscInt nonew; 499 MatScalar *ap1,*ap2; 500 501 PetscFunctionBegin; 502 if (v) PetscValidScalarPointer(v,6); 503 for (i=0; i<m; i++) { 504 if (im[i] < 0) continue; 505 #if defined(PETSC_USE_DEBUG) 506 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 507 #endif 508 if (im[i] >= rstart && im[i] < rend) { 509 row = im[i] - rstart; 510 lastcol1 = -1; 511 rp1 = aj + ai[row]; 512 ap1 = aa + ai[row]; 513 rmax1 = aimax[row]; 514 nrow1 = ailen[row]; 515 low1 = 0; 516 high1 = nrow1; 517 lastcol2 = -1; 518 rp2 = bj + bi[row]; 519 ap2 = ba + bi[row]; 520 rmax2 = bimax[row]; 521 nrow2 = bilen[row]; 522 low2 = 0; 523 high2 = nrow2; 524 525 for (j=0; j<n; j++) { 526 if (v) { 527 if (roworiented) value = v[i*n+j]; 528 else value = v[i+j*m]; 529 } else value = 0.0; 530 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 531 if (in[j] >= cstart && in[j] < cend) { 532 col = in[j] - cstart; 533 nonew = a->nonew; 534 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 535 } else if (in[j] < 0) continue; 536 #if defined(PETSC_USE_DEBUG) 537 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 538 #endif 539 else { 540 if (mat->was_assembled) { 541 if (!aij->colmap) { 542 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 543 } 544 #if defined(PETSC_USE_CTABLE) 545 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 546 col--; 547 #else 548 col = aij->colmap[in[j]] - 1; 549 #endif 550 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 551 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 552 col = in[j]; 553 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 554 B = aij->B; 555 b = (Mat_SeqAIJ*)B->data; 556 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 557 rp2 = bj + bi[row]; 558 ap2 = ba + bi[row]; 559 rmax2 = bimax[row]; 560 nrow2 = bilen[row]; 561 low2 = 0; 562 high2 = nrow2; 563 bm = aij->B->rmap->n; 564 ba = b->a; 565 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 566 } else col = in[j]; 567 nonew = b->nonew; 568 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 569 } 570 } 571 } else { 572 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 573 if (!aij->donotstash) { 574 mat->assembled = PETSC_FALSE; 575 if (roworiented) { 576 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 577 } else { 578 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 579 } 580 } 581 } 582 } 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatGetValues_MPIAIJ" 588 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 589 { 590 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 591 PetscErrorCode ierr; 592 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 593 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 594 595 PetscFunctionBegin; 596 for (i=0; i<m; i++) { 597 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 598 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 599 if (idxm[i] >= rstart && idxm[i] < rend) { 600 row = idxm[i] - rstart; 601 for (j=0; j<n; j++) { 602 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 603 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 604 if (idxn[j] >= cstart && idxn[j] < cend) { 605 col = idxn[j] - cstart; 606 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 607 } else { 608 if (!aij->colmap) { 609 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 610 } 611 #if defined(PETSC_USE_CTABLE) 612 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 613 col--; 614 #else 615 col = aij->colmap[idxn[j]] - 1; 616 #endif 617 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 618 else { 619 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } 621 } 622 } 623 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 624 } 625 PetscFunctionReturn(0); 626 } 627 628 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 632 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 633 { 634 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 635 PetscErrorCode ierr; 636 PetscInt nstash,reallocs; 637 InsertMode addv; 638 639 PetscFunctionBegin; 640 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 668 669 PetscFunctionBegin; 670 if (!aij->donotstash && !mat->nooffprocentries) { 671 while (1) { 672 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 673 if (!flg) break; 674 675 for (i=0; i<n; ) { 676 /* Now identify the consecutive vals belonging to the same row */ 677 for (j=i,rstart=row[j]; j<n; j++) { 678 if (row[j] != rstart) break; 679 } 680 if (j < n) ncols = j-i; 681 else ncols = n-i; 682 /* Now assemble all these values with a single function call */ 683 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 684 685 i = j; 686 } 687 } 688 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 689 } 690 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 692 693 /* determine if any processor has disassembled, if so we must 694 also disassemble ourselfs, in order that we may reassemble. */ 695 /* 696 if nonzero structure of submatrix B cannot change then we know that 697 no processor disassembled thus we can skip this stuff 698 */ 699 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 700 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 701 if (mat->was_assembled && !other_disassembled) { 702 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 } 705 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 706 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 709 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 710 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 711 712 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 713 714 aij->rowvalues = 0; 715 716 /* used by MatAXPY() */ 717 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 718 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 719 720 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 721 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 727 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 728 { 729 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 734 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatZeroRows_MPIAIJ" 740 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 741 { 742 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 743 PetscInt *owners = A->rmap->range; 744 PetscInt n = A->rmap->n; 745 PetscMPIInt size = mat->size; 746 PetscSF sf; 747 PetscInt *lrows; 748 PetscSFNode *rrows; 749 PetscInt lastidx = -1, r, p = 0, len = 0; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 /* Create SF where leaves are input rows and roots are owned rows */ 754 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 755 for (r = 0; r < n; ++r) lrows[r] = -1; 756 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 757 for (r = 0; r < N; ++r) { 758 const PetscInt idx = rows[r]; 759 PetscBool found = PETSC_FALSE; 760 /* Trick for efficient searching for sorted rows */ 761 if (lastidx > idx) p = 0; 762 lastidx = idx; 763 for (; p < size; ++p) { 764 if (idx >= owners[p] && idx < owners[p+1]) { 765 rrows[r].rank = p; 766 rrows[r].index = rows[r] - owners[p]; 767 found = PETSC_TRUE; 768 break; 769 } 770 } 771 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 772 } 773 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 774 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 775 /* Collect flags for rows to be zeroed */ 776 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 777 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 778 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 779 /* Compress and put in row numbers */ 780 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 781 /* fix right hand side if needed */ 782 if (x && b) { 783 const PetscScalar *xx; 784 PetscScalar *bb; 785 786 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 787 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 788 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 789 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 790 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 791 } 792 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 793 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 794 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 795 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 796 } else if (diag != 0.0) { 797 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 798 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 799 for (r = 0; r < len; ++r) { 800 const PetscInt row = lrows[r] + A->rmap->rstart; 801 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 802 } 803 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 804 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 805 } else { 806 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 807 } 808 ierr = PetscFree(lrows);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 814 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 815 { 816 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 817 PetscErrorCode ierr; 818 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 819 PetscInt i,j,r,m,p = 0,len = 0; 820 PetscInt *lrows,*owners = A->rmap->range; 821 PetscSFNode *rrows; 822 PetscSF sf; 823 const PetscScalar *xx; 824 PetscScalar *bb,*mask; 825 Vec xmask,lmask; 826 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 827 const PetscInt *aj, *ii,*ridx; 828 PetscScalar *aa; 829 #if defined(PETSC_DEBUG) 830 PetscBool found = PETSC_FALSE; 831 #endif 832 833 PetscFunctionBegin; 834 /* Create SF where leaves are input rows and roots are owned rows */ 835 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 836 for (r = 0; r < n; ++r) lrows[r] = -1; 837 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 838 for (r = 0; r < N; ++r) { 839 const PetscInt idx = rows[r]; 840 PetscBool found = PETSC_FALSE; 841 /* Trick for efficient searching for sorted rows */ 842 if (lastidx > idx) p = 0; 843 lastidx = idx; 844 for (; p < size; ++p) { 845 if (idx >= owners[p] && idx < owners[p+1]) { 846 rrows[r].rank = p; 847 rrows[r].index = rows[r] - owners[p]; 848 found = PETSC_TRUE; 849 break; 850 } 851 } 852 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 853 } 854 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 855 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 856 /* Collect flags for rows to be zeroed */ 857 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 858 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 859 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 860 /* Compress and put in row numbers */ 861 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 862 /* zero diagonal part of matrix */ 863 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 864 /* handle off diagonal part of matrix */ 865 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 866 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 867 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 868 for (i=0; i<len; i++) bb[lrows[i]] = 1; 869 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 870 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 873 if (x) { 874 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 877 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 878 } 879 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 880 /* remove zeroed rows of off diagonal matrix */ 881 ii = aij->i; 882 for (i=0; i<len; i++) { 883 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 884 } 885 /* loop over all elements of off process part of matrix zeroing removed columns*/ 886 if (aij->compressedrow.use) { 887 m = aij->compressedrow.nrows; 888 ii = aij->compressedrow.i; 889 ridx = aij->compressedrow.rindex; 890 for (i=0; i<m; i++) { 891 n = ii[i+1] - ii[i]; 892 aj = aij->j + ii[i]; 893 aa = aij->a + ii[i]; 894 895 for (j=0; j<n; j++) { 896 if (PetscAbsScalar(mask[*aj])) { 897 if (b) bb[*ridx] -= *aa*xx[*aj]; 898 *aa = 0.0; 899 } 900 aa++; 901 aj++; 902 } 903 ridx++; 904 } 905 } else { /* do not use compressed row format */ 906 m = l->B->rmap->n; 907 for (i=0; i<m; i++) { 908 n = ii[i+1] - ii[i]; 909 aj = aij->j + ii[i]; 910 aa = aij->a + ii[i]; 911 for (j=0; j<n; j++) { 912 if (PetscAbsScalar(mask[*aj])) { 913 if (b) bb[i] -= *aa*xx[*aj]; 914 *aa = 0.0; 915 } 916 aa++; 917 aj++; 918 } 919 } 920 } 921 if (x) { 922 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 923 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 924 } 925 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 926 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 927 ierr = PetscFree(lrows);CHKERRQ(ierr); 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatMult_MPIAIJ" 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 939 PetscFunctionBegin; 940 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 941 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 942 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 944 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 952 { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 #undef __FUNCT__ 962 #define __FUNCT__ "MatMultAdd_MPIAIJ" 963 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 964 { 965 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 966 PetscErrorCode ierr; 967 968 PetscFunctionBegin; 969 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 970 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 971 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 #undef __FUNCT__ 977 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 978 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 979 { 980 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 981 PetscErrorCode ierr; 982 PetscBool merged; 983 984 PetscFunctionBegin; 985 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 986 /* do nondiagonal part */ 987 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 988 if (!merged) { 989 /* send it on its way */ 990 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 991 /* do local part */ 992 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 993 /* receive remote parts: note this assumes the values are not actually */ 994 /* added in yy until the next line, */ 995 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 996 } else { 997 /* do local part */ 998 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 999 /* send it on its way */ 1000 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1001 /* values actually were received in the Begin() but we need to call this nop */ 1002 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1009 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1010 { 1011 MPI_Comm comm; 1012 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1013 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1014 IS Me,Notme; 1015 PetscErrorCode ierr; 1016 PetscInt M,N,first,last,*notme,i; 1017 PetscMPIInt size; 1018 1019 PetscFunctionBegin; 1020 /* Easy test: symmetric diagonal block */ 1021 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1022 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1023 if (!*f) PetscFunctionReturn(0); 1024 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1025 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1026 if (size == 1) PetscFunctionReturn(0); 1027 1028 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1029 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1030 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1031 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1032 for (i=0; i<first; i++) notme[i] = i; 1033 for (i=last; i<M; i++) notme[i-last+first] = i; 1034 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1035 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1036 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1037 Aoff = Aoffs[0]; 1038 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1039 Boff = Boffs[0]; 1040 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1041 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1042 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1043 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1044 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1045 ierr = PetscFree(notme);CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1051 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1052 { 1053 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1054 PetscErrorCode ierr; 1055 1056 PetscFunctionBegin; 1057 /* do nondiagonal part */ 1058 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1059 /* send it on its way */ 1060 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1061 /* do local part */ 1062 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1063 /* receive remote parts */ 1064 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 /* 1069 This only works correctly for square matrices where the subblock A->A is the 1070 diagonal block 1071 */ 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1074 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1075 { 1076 PetscErrorCode ierr; 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 1079 PetscFunctionBegin; 1080 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1081 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1082 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatScale_MPIAIJ" 1088 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1089 { 1090 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 1093 PetscFunctionBegin; 1094 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1095 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatDestroy_MPIAIJ" 1101 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1102 { 1103 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 #if defined(PETSC_USE_LOG) 1108 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1109 #endif 1110 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1111 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1112 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1113 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1114 #if defined(PETSC_USE_CTABLE) 1115 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1116 #else 1117 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1118 #endif 1119 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1120 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1121 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1122 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1123 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1124 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1125 1126 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1134 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1140 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1141 { 1142 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1143 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1144 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1145 PetscErrorCode ierr; 1146 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1147 int fd; 1148 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1149 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1150 PetscScalar *column_values; 1151 PetscInt message_count,flowcontrolcount; 1152 FILE *file; 1153 1154 PetscFunctionBegin; 1155 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1156 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1157 nz = A->nz + B->nz; 1158 if (!rank) { 1159 header[0] = MAT_FILE_CLASSID; 1160 header[1] = mat->rmap->N; 1161 header[2] = mat->cmap->N; 1162 1163 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1164 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1165 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1166 /* get largest number of rows any processor has */ 1167 rlen = mat->rmap->n; 1168 range = mat->rmap->range; 1169 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1170 } else { 1171 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1172 rlen = mat->rmap->n; 1173 } 1174 1175 /* load up the local row counts */ 1176 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1177 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1178 1179 /* store the row lengths to the file */ 1180 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1181 if (!rank) { 1182 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1183 for (i=1; i<size; i++) { 1184 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1185 rlen = range[i+1] - range[i]; 1186 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1187 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1188 } 1189 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1190 } else { 1191 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1192 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1193 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1194 } 1195 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1196 1197 /* load up the local column indices */ 1198 nzmax = nz; /* th processor needs space a largest processor needs */ 1199 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1200 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1201 cnt = 0; 1202 for (i=0; i<mat->rmap->n; i++) { 1203 for (j=B->i[i]; j<B->i[i+1]; j++) { 1204 if ((col = garray[B->j[j]]) > cstart) break; 1205 column_indices[cnt++] = col; 1206 } 1207 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1208 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1209 } 1210 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1211 1212 /* store the column indices to the file */ 1213 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1214 if (!rank) { 1215 MPI_Status status; 1216 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1217 for (i=1; i<size; i++) { 1218 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1219 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1220 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1221 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1222 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1223 } 1224 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1225 } else { 1226 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1227 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1228 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1229 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1230 } 1231 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1232 1233 /* load up the local column values */ 1234 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1235 cnt = 0; 1236 for (i=0; i<mat->rmap->n; i++) { 1237 for (j=B->i[i]; j<B->i[i+1]; j++) { 1238 if (garray[B->j[j]] > cstart) break; 1239 column_values[cnt++] = B->a[j]; 1240 } 1241 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1242 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1243 } 1244 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1245 1246 /* store the column values to the file */ 1247 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1248 if (!rank) { 1249 MPI_Status status; 1250 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1251 for (i=1; i<size; i++) { 1252 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1253 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1254 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1255 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1256 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1257 } 1258 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1259 } else { 1260 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1261 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1262 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1263 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1264 } 1265 ierr = PetscFree(column_values);CHKERRQ(ierr); 1266 1267 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1268 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #include <petscdraw.h> 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1275 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1276 { 1277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1278 PetscErrorCode ierr; 1279 PetscMPIInt rank = aij->rank,size = aij->size; 1280 PetscBool isdraw,iascii,isbinary; 1281 PetscViewer sviewer; 1282 PetscViewerFormat format; 1283 1284 PetscFunctionBegin; 1285 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1286 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1287 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1288 if (iascii) { 1289 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1290 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1291 MatInfo info; 1292 PetscBool inodes; 1293 1294 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1295 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1296 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1297 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1298 if (!inodes) { 1299 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1300 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1301 } else { 1302 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1303 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1304 } 1305 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1306 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1307 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1308 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1309 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1310 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1312 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1315 PetscInt inodecount,inodelimit,*inodes; 1316 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1317 if (inodes) { 1318 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1319 } else { 1320 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1321 } 1322 PetscFunctionReturn(0); 1323 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1324 PetscFunctionReturn(0); 1325 } 1326 } else if (isbinary) { 1327 if (size == 1) { 1328 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1329 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1330 } else { 1331 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1332 } 1333 PetscFunctionReturn(0); 1334 } else if (isdraw) { 1335 PetscDraw draw; 1336 PetscBool isnull; 1337 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1338 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1339 } 1340 1341 if (size == 1) { 1342 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1343 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1344 } else { 1345 /* assemble the entire matrix onto first processor. */ 1346 Mat A; 1347 Mat_SeqAIJ *Aloc; 1348 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1349 MatScalar *a; 1350 1351 if (mat->rmap->N > 1024) { 1352 PetscBool flg = PETSC_FALSE; 1353 1354 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1355 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1356 } 1357 1358 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1359 if (!rank) { 1360 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1361 } else { 1362 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1363 } 1364 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1365 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1366 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1367 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1368 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1369 1370 /* copy over the A part */ 1371 Aloc = (Mat_SeqAIJ*)aij->A->data; 1372 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1373 row = mat->rmap->rstart; 1374 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1375 for (i=0; i<m; i++) { 1376 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1377 row++; 1378 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1379 } 1380 aj = Aloc->j; 1381 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1382 1383 /* copy over the B part */ 1384 Aloc = (Mat_SeqAIJ*)aij->B->data; 1385 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1386 row = mat->rmap->rstart; 1387 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1388 ct = cols; 1389 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1390 for (i=0; i<m; i++) { 1391 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1392 row++; 1393 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1394 } 1395 ierr = PetscFree(ct);CHKERRQ(ierr); 1396 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1398 /* 1399 Everyone has to call to draw the matrix since the graphics waits are 1400 synchronized across all processors that share the PetscDraw object 1401 */ 1402 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1403 if (!rank) { 1404 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1405 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1406 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1407 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1408 } 1409 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1410 ierr = MatDestroy(&A);CHKERRQ(ierr); 1411 } 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatView_MPIAIJ" 1417 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1418 { 1419 PetscErrorCode ierr; 1420 PetscBool iascii,isdraw,issocket,isbinary; 1421 1422 PetscFunctionBegin; 1423 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1424 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1425 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1426 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1427 if (iascii || isdraw || isbinary || issocket) { 1428 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatSOR_MPIAIJ" 1435 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1436 { 1437 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1438 PetscErrorCode ierr; 1439 Vec bb1 = 0; 1440 PetscBool hasop; 1441 1442 PetscFunctionBegin; 1443 if (flag == SOR_APPLY_UPPER) { 1444 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1449 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1450 } 1451 1452 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1453 if (flag & SOR_ZERO_INITIAL_GUESS) { 1454 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1455 its--; 1456 } 1457 1458 while (its--) { 1459 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1460 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1461 1462 /* update rhs: bb1 = bb - B*x */ 1463 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1464 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1465 1466 /* local sweep */ 1467 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1468 } 1469 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1470 if (flag & SOR_ZERO_INITIAL_GUESS) { 1471 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1472 its--; 1473 } 1474 while (its--) { 1475 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1476 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1477 1478 /* update rhs: bb1 = bb - B*x */ 1479 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1480 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1481 1482 /* local sweep */ 1483 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1484 } 1485 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1486 if (flag & SOR_ZERO_INITIAL_GUESS) { 1487 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1488 its--; 1489 } 1490 while (its--) { 1491 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1492 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1493 1494 /* update rhs: bb1 = bb - B*x */ 1495 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1496 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1497 1498 /* local sweep */ 1499 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1500 } 1501 } else if (flag & SOR_EISENSTAT) { 1502 Vec xx1; 1503 1504 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1505 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1506 1507 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1508 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1509 if (!mat->diag) { 1510 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1511 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1512 } 1513 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1514 if (hasop) { 1515 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1516 } else { 1517 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1518 } 1519 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1520 1521 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1522 1523 /* local sweep */ 1524 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1525 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1526 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1527 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1528 1529 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatPermute_MPIAIJ" 1535 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1536 { 1537 Mat aA,aB,Aperm; 1538 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1539 PetscScalar *aa,*ba; 1540 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1541 PetscSF rowsf,sf; 1542 IS parcolp = NULL; 1543 PetscBool done; 1544 PetscErrorCode ierr; 1545 1546 PetscFunctionBegin; 1547 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1548 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1549 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1550 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1551 1552 /* Invert row permutation to find out where my rows should go */ 1553 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1554 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1555 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1556 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1557 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1558 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1559 1560 /* Invert column permutation to find out where my columns should go */ 1561 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1562 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1563 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1564 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1565 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1566 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1567 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1568 1569 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1570 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1571 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1572 1573 /* Find out where my gcols should go */ 1574 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1575 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1576 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1577 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1578 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1579 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1580 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1581 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1582 1583 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1584 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1585 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1586 for (i=0; i<m; i++) { 1587 PetscInt row = rdest[i],rowner; 1588 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1589 for (j=ai[i]; j<ai[i+1]; j++) { 1590 PetscInt cowner,col = cdest[aj[j]]; 1591 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1592 if (rowner == cowner) dnnz[i]++; 1593 else onnz[i]++; 1594 } 1595 for (j=bi[i]; j<bi[i+1]; j++) { 1596 PetscInt cowner,col = gcdest[bj[j]]; 1597 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1598 if (rowner == cowner) dnnz[i]++; 1599 else onnz[i]++; 1600 } 1601 } 1602 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1603 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1604 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1605 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1606 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1607 1608 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1609 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1610 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1611 for (i=0; i<m; i++) { 1612 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1613 PetscInt rowlen; 1614 rowlen = ai[i+1] - ai[i]; 1615 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1616 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1617 rowlen = bi[i+1] - bi[i]; 1618 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1619 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1620 } 1621 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1622 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1623 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1624 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1625 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1626 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1627 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1628 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1629 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1630 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1631 *B = Aperm; 1632 PetscFunctionReturn(0); 1633 } 1634 1635 #undef __FUNCT__ 1636 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1637 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1638 { 1639 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1640 Mat A = mat->A,B = mat->B; 1641 PetscErrorCode ierr; 1642 PetscReal isend[5],irecv[5]; 1643 1644 PetscFunctionBegin; 1645 info->block_size = 1.0; 1646 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1647 1648 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1649 isend[3] = info->memory; isend[4] = info->mallocs; 1650 1651 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1652 1653 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1654 isend[3] += info->memory; isend[4] += info->mallocs; 1655 if (flag == MAT_LOCAL) { 1656 info->nz_used = isend[0]; 1657 info->nz_allocated = isend[1]; 1658 info->nz_unneeded = isend[2]; 1659 info->memory = isend[3]; 1660 info->mallocs = isend[4]; 1661 } else if (flag == MAT_GLOBAL_MAX) { 1662 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } else if (flag == MAT_GLOBAL_SUM) { 1670 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1671 1672 info->nz_used = irecv[0]; 1673 info->nz_allocated = irecv[1]; 1674 info->nz_unneeded = irecv[2]; 1675 info->memory = irecv[3]; 1676 info->mallocs = irecv[4]; 1677 } 1678 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1679 info->fill_ratio_needed = 0; 1680 info->factor_mallocs = 0; 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatSetOption_MPIAIJ" 1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1689 PetscErrorCode ierr; 1690 1691 PetscFunctionBegin; 1692 switch (op) { 1693 case MAT_NEW_NONZERO_LOCATIONS: 1694 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1695 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1696 case MAT_KEEP_NONZERO_PATTERN: 1697 case MAT_NEW_NONZERO_LOCATION_ERR: 1698 case MAT_USE_INODES: 1699 case MAT_IGNORE_ZERO_ENTRIES: 1700 MatCheckPreallocated(A,1); 1701 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1702 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1703 break; 1704 case MAT_ROW_ORIENTED: 1705 a->roworiented = flg; 1706 1707 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1708 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1709 break; 1710 case MAT_NEW_DIAGONALS: 1711 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1712 break; 1713 case MAT_IGNORE_OFF_PROC_ENTRIES: 1714 a->donotstash = flg; 1715 break; 1716 case MAT_SPD: 1717 A->spd_set = PETSC_TRUE; 1718 A->spd = flg; 1719 if (flg) { 1720 A->symmetric = PETSC_TRUE; 1721 A->structurally_symmetric = PETSC_TRUE; 1722 A->symmetric_set = PETSC_TRUE; 1723 A->structurally_symmetric_set = PETSC_TRUE; 1724 } 1725 break; 1726 case MAT_SYMMETRIC: 1727 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1728 break; 1729 case MAT_STRUCTURALLY_SYMMETRIC: 1730 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1731 break; 1732 case MAT_HERMITIAN: 1733 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1734 break; 1735 case MAT_SYMMETRY_ETERNAL: 1736 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1737 break; 1738 default: 1739 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatGetRow_MPIAIJ" 1746 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1747 { 1748 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1749 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1750 PetscErrorCode ierr; 1751 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1752 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1753 PetscInt *cmap,*idx_p; 1754 1755 PetscFunctionBegin; 1756 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1757 mat->getrowactive = PETSC_TRUE; 1758 1759 if (!mat->rowvalues && (idx || v)) { 1760 /* 1761 allocate enough space to hold information from the longest row. 1762 */ 1763 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1764 PetscInt max = 1,tmp; 1765 for (i=0; i<matin->rmap->n; i++) { 1766 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1767 if (max < tmp) max = tmp; 1768 } 1769 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1770 } 1771 1772 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1773 lrow = row - rstart; 1774 1775 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1776 if (!v) {pvA = 0; pvB = 0;} 1777 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1778 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1779 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1780 nztot = nzA + nzB; 1781 1782 cmap = mat->garray; 1783 if (v || idx) { 1784 if (nztot) { 1785 /* Sort by increasing column numbers, assuming A and B already sorted */ 1786 PetscInt imark = -1; 1787 if (v) { 1788 *v = v_p = mat->rowvalues; 1789 for (i=0; i<nzB; i++) { 1790 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1791 else break; 1792 } 1793 imark = i; 1794 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1795 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1796 } 1797 if (idx) { 1798 *idx = idx_p = mat->rowindices; 1799 if (imark > -1) { 1800 for (i=0; i<imark; i++) { 1801 idx_p[i] = cmap[cworkB[i]]; 1802 } 1803 } else { 1804 for (i=0; i<nzB; i++) { 1805 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1806 else break; 1807 } 1808 imark = i; 1809 } 1810 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1811 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1812 } 1813 } else { 1814 if (idx) *idx = 0; 1815 if (v) *v = 0; 1816 } 1817 } 1818 *nz = nztot; 1819 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1820 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1826 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1827 { 1828 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1829 1830 PetscFunctionBegin; 1831 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1832 aij->getrowactive = PETSC_FALSE; 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatNorm_MPIAIJ" 1838 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1839 { 1840 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1841 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1842 PetscErrorCode ierr; 1843 PetscInt i,j,cstart = mat->cmap->rstart; 1844 PetscReal sum = 0.0; 1845 MatScalar *v; 1846 1847 PetscFunctionBegin; 1848 if (aij->size == 1) { 1849 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1850 } else { 1851 if (type == NORM_FROBENIUS) { 1852 v = amat->a; 1853 for (i=0; i<amat->nz; i++) { 1854 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1855 } 1856 v = bmat->a; 1857 for (i=0; i<bmat->nz; i++) { 1858 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1859 } 1860 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1861 *norm = PetscSqrtReal(*norm); 1862 } else if (type == NORM_1) { /* max column norm */ 1863 PetscReal *tmp,*tmp2; 1864 PetscInt *jj,*garray = aij->garray; 1865 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1866 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1867 *norm = 0.0; 1868 v = amat->a; jj = amat->j; 1869 for (j=0; j<amat->nz; j++) { 1870 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1871 } 1872 v = bmat->a; jj = bmat->j; 1873 for (j=0; j<bmat->nz; j++) { 1874 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1875 } 1876 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1877 for (j=0; j<mat->cmap->N; j++) { 1878 if (tmp2[j] > *norm) *norm = tmp2[j]; 1879 } 1880 ierr = PetscFree(tmp);CHKERRQ(ierr); 1881 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1882 } else if (type == NORM_INFINITY) { /* max row norm */ 1883 PetscReal ntemp = 0.0; 1884 for (j=0; j<aij->A->rmap->n; j++) { 1885 v = amat->a + amat->i[j]; 1886 sum = 0.0; 1887 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1888 sum += PetscAbsScalar(*v); v++; 1889 } 1890 v = bmat->a + bmat->i[j]; 1891 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1892 sum += PetscAbsScalar(*v); v++; 1893 } 1894 if (sum > ntemp) ntemp = sum; 1895 } 1896 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1897 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "MatTranspose_MPIAIJ" 1904 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1905 { 1906 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1907 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1908 PetscErrorCode ierr; 1909 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1910 PetscInt cstart = A->cmap->rstart,ncol; 1911 Mat B; 1912 MatScalar *array; 1913 1914 PetscFunctionBegin; 1915 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1916 1917 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1918 ai = Aloc->i; aj = Aloc->j; 1919 bi = Bloc->i; bj = Bloc->j; 1920 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1921 PetscInt *d_nnz,*g_nnz,*o_nnz; 1922 PetscSFNode *oloc; 1923 PETSC_UNUSED PetscSF sf; 1924 1925 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1926 /* compute d_nnz for preallocation */ 1927 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1928 for (i=0; i<ai[ma]; i++) { 1929 d_nnz[aj[i]]++; 1930 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1931 } 1932 /* compute local off-diagonal contributions */ 1933 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1934 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1935 /* map those to global */ 1936 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1937 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1938 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1939 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1940 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1941 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1942 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1943 1944 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1945 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1946 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 1947 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1948 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1949 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1950 } else { 1951 B = *matout; 1952 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1953 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1954 } 1955 1956 /* copy over the A part */ 1957 array = Aloc->a; 1958 row = A->rmap->rstart; 1959 for (i=0; i<ma; i++) { 1960 ncol = ai[i+1]-ai[i]; 1961 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1962 row++; 1963 array += ncol; aj += ncol; 1964 } 1965 aj = Aloc->j; 1966 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1967 1968 /* copy over the B part */ 1969 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1970 array = Bloc->a; 1971 row = A->rmap->rstart; 1972 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1973 cols_tmp = cols; 1974 for (i=0; i<mb; i++) { 1975 ncol = bi[i+1]-bi[i]; 1976 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1977 row++; 1978 array += ncol; cols_tmp += ncol; 1979 } 1980 ierr = PetscFree(cols);CHKERRQ(ierr); 1981 1982 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1983 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1984 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1985 *matout = B; 1986 } else { 1987 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 1992 #undef __FUNCT__ 1993 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1994 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1995 { 1996 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1997 Mat a = aij->A,b = aij->B; 1998 PetscErrorCode ierr; 1999 PetscInt s1,s2,s3; 2000 2001 PetscFunctionBegin; 2002 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2003 if (rr) { 2004 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2005 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2006 /* Overlap communication with computation. */ 2007 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2008 } 2009 if (ll) { 2010 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2011 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2012 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2013 } 2014 /* scale the diagonal block */ 2015 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2016 2017 if (rr) { 2018 /* Do a scatter end and then right scale the off-diagonal block */ 2019 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2020 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2021 } 2022 PetscFunctionReturn(0); 2023 } 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2027 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2028 { 2029 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2030 PetscErrorCode ierr; 2031 2032 PetscFunctionBegin; 2033 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatEqual_MPIAIJ" 2039 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2040 { 2041 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2042 Mat a,b,c,d; 2043 PetscBool flg; 2044 PetscErrorCode ierr; 2045 2046 PetscFunctionBegin; 2047 a = matA->A; b = matA->B; 2048 c = matB->A; d = matB->B; 2049 2050 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2051 if (flg) { 2052 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2053 } 2054 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "MatCopy_MPIAIJ" 2060 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2061 { 2062 PetscErrorCode ierr; 2063 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2064 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2065 2066 PetscFunctionBegin; 2067 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2068 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2069 /* because of the column compression in the off-processor part of the matrix a->B, 2070 the number of columns in a->B and b->B may be different, hence we cannot call 2071 the MatCopy() directly on the two parts. If need be, we can provide a more 2072 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2073 then copying the submatrices */ 2074 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2075 } else { 2076 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2077 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2078 } 2079 PetscFunctionReturn(0); 2080 } 2081 2082 #undef __FUNCT__ 2083 #define __FUNCT__ "MatSetUp_MPIAIJ" 2084 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2085 { 2086 PetscErrorCode ierr; 2087 2088 PetscFunctionBegin; 2089 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2090 PetscFunctionReturn(0); 2091 } 2092 2093 #undef __FUNCT__ 2094 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2095 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2096 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2097 { 2098 PetscInt i,m=Y->rmap->N; 2099 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2100 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2101 const PetscInt *xi = x->i,*yi = y->i; 2102 2103 PetscFunctionBegin; 2104 /* Set the number of nonzeros in the new matrix */ 2105 for (i=0; i<m; i++) { 2106 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2107 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2108 nnz[i] = 0; 2109 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2110 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2111 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2112 nnz[i]++; 2113 } 2114 for (; k<nzy; k++) nnz[i]++; 2115 } 2116 PetscFunctionReturn(0); 2117 } 2118 2119 #undef __FUNCT__ 2120 #define __FUNCT__ "MatAXPY_MPIAIJ" 2121 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2122 { 2123 PetscErrorCode ierr; 2124 PetscInt i; 2125 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2126 PetscBLASInt bnz,one=1; 2127 Mat_SeqAIJ *x,*y; 2128 2129 PetscFunctionBegin; 2130 if (str == SAME_NONZERO_PATTERN) { 2131 PetscScalar alpha = a; 2132 x = (Mat_SeqAIJ*)xx->A->data; 2133 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2134 y = (Mat_SeqAIJ*)yy->A->data; 2135 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2136 x = (Mat_SeqAIJ*)xx->B->data; 2137 y = (Mat_SeqAIJ*)yy->B->data; 2138 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2139 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2140 } else if (str == SUBSET_NONZERO_PATTERN) { 2141 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2142 2143 x = (Mat_SeqAIJ*)xx->B->data; 2144 y = (Mat_SeqAIJ*)yy->B->data; 2145 if (y->xtoy && y->XtoY != xx->B) { 2146 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2147 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2148 } 2149 if (!y->xtoy) { /* get xtoy */ 2150 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2151 y->XtoY = xx->B; 2152 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2153 } 2154 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2155 } else { 2156 Mat B; 2157 PetscInt *nnz_d,*nnz_o; 2158 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2159 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2160 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2161 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2162 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2163 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2164 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2165 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2166 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2167 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2168 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2169 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2170 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2171 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2172 } 2173 PetscFunctionReturn(0); 2174 } 2175 2176 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2177 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "MatConjugate_MPIAIJ" 2180 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2181 { 2182 #if defined(PETSC_USE_COMPLEX) 2183 PetscErrorCode ierr; 2184 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2185 2186 PetscFunctionBegin; 2187 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2188 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2189 #else 2190 PetscFunctionBegin; 2191 #endif 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatRealPart_MPIAIJ" 2197 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2198 { 2199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2204 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2205 PetscFunctionReturn(0); 2206 } 2207 2208 #undef __FUNCT__ 2209 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2210 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2211 { 2212 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2213 PetscErrorCode ierr; 2214 2215 PetscFunctionBegin; 2216 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2217 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #if defined(PETSC_HAVE_PBGL) 2222 2223 #include <boost/parallel/mpi/bsp_process_group.hpp> 2224 #include <boost/graph/distributed/ilu_default_graph.hpp> 2225 #include <boost/graph/distributed/ilu_0_block.hpp> 2226 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2227 #include <boost/graph/distributed/petsc/interface.hpp> 2228 #include <boost/multi_array.hpp> 2229 #include <boost/parallel/distributed_property_map->hpp> 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2233 /* 2234 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2235 */ 2236 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2237 { 2238 namespace petsc = boost::distributed::petsc; 2239 2240 namespace graph_dist = boost::graph::distributed; 2241 using boost::graph::distributed::ilu_default::process_group_type; 2242 using boost::graph::ilu_permuted; 2243 2244 PetscBool row_identity, col_identity; 2245 PetscContainer c; 2246 PetscInt m, n, M, N; 2247 PetscErrorCode ierr; 2248 2249 PetscFunctionBegin; 2250 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2251 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2252 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2253 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2254 2255 process_group_type pg; 2256 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2257 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2258 lgraph_type& level_graph = *lgraph_p; 2259 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2260 2261 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2262 ilu_permuted(level_graph); 2263 2264 /* put together the new matrix */ 2265 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2266 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2267 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2268 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2269 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2270 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2271 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2272 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2273 2274 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2275 ierr = PetscContainerSetPointer(c, lgraph_p); 2276 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2277 ierr = PetscContainerDestroy(&c); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2283 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2284 { 2285 PetscFunctionBegin; 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #undef __FUNCT__ 2290 #define __FUNCT__ "MatSolve_MPIAIJ" 2291 /* 2292 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2293 */ 2294 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2295 { 2296 namespace graph_dist = boost::graph::distributed; 2297 2298 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2299 lgraph_type *lgraph_p; 2300 PetscContainer c; 2301 PetscErrorCode ierr; 2302 2303 PetscFunctionBegin; 2304 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2305 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2306 ierr = VecCopy(b, x);CHKERRQ(ierr); 2307 2308 PetscScalar *array_x; 2309 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2310 PetscInt sx; 2311 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2312 2313 PetscScalar *array_b; 2314 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2315 PetscInt sb; 2316 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2317 2318 lgraph_type& level_graph = *lgraph_p; 2319 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2320 2321 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2322 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2323 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2324 2325 typedef boost::iterator_property_map<array_ref_type::iterator, 2326 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2327 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2328 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2329 2330 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2331 PetscFunctionReturn(0); 2332 } 2333 #endif 2334 2335 #undef __FUNCT__ 2336 #define __FUNCT__ "MatDestroy_MatRedundant" 2337 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2338 { 2339 PetscErrorCode ierr; 2340 Mat_Redundant *redund; 2341 PetscInt i; 2342 PetscMPIInt size; 2343 2344 PetscFunctionBegin; 2345 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2346 if (size == 1) { 2347 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2348 redund = a->redundant; 2349 } else { 2350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2351 redund = a->redundant; 2352 } 2353 if (redund){ 2354 if (redund->matseq) { /* via MatGetSubMatrices() */ 2355 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2356 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2357 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2358 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2359 } else { 2360 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2361 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2362 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2363 for (i=0; i<redund->nrecvs; i++) { 2364 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2365 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2366 } 2367 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2368 } 2369 2370 if (redund->psubcomm) { 2371 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2372 } 2373 ierr = redund->Destroy(A);CHKERRQ(ierr); 2374 ierr = PetscFree(redund);CHKERRQ(ierr); 2375 } 2376 PetscFunctionReturn(0); 2377 } 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2381 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2382 { 2383 PetscMPIInt rank,size; 2384 MPI_Comm comm; 2385 PetscErrorCode ierr; 2386 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2387 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2388 PetscInt *rowrange = mat->rmap->range; 2389 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2390 Mat A = aij->A,B=aij->B,C=*matredundant; 2391 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2392 PetscScalar *sbuf_a; 2393 PetscInt nzlocal=a->nz+b->nz; 2394 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2395 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2396 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2397 MatScalar *aworkA,*aworkB; 2398 PetscScalar *vals; 2399 PetscMPIInt tag1,tag2,tag3,imdex; 2400 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2401 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2402 MPI_Status recv_status,*send_status; 2403 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2404 PetscInt **rbuf_j=NULL; 2405 PetscScalar **rbuf_a=NULL; 2406 Mat_Redundant *redund =NULL; 2407 2408 PetscFunctionBegin; 2409 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2410 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2411 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2412 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2413 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2414 2415 if (reuse == MAT_REUSE_MATRIX) { 2416 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2417 if (subsize == 1) { 2418 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2419 redund = c->redundant; 2420 } else { 2421 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2422 redund = c->redundant; 2423 } 2424 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2425 2426 nsends = redund->nsends; 2427 nrecvs = redund->nrecvs; 2428 send_rank = redund->send_rank; 2429 recv_rank = redund->recv_rank; 2430 sbuf_nz = redund->sbuf_nz; 2431 rbuf_nz = redund->rbuf_nz; 2432 sbuf_j = redund->sbuf_j; 2433 sbuf_a = redund->sbuf_a; 2434 rbuf_j = redund->rbuf_j; 2435 rbuf_a = redund->rbuf_a; 2436 } 2437 2438 if (reuse == MAT_INITIAL_MATRIX) { 2439 PetscInt nleftover,np_subcomm; 2440 2441 /* get the destination processors' id send_rank, nsends and nrecvs */ 2442 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2443 2444 np_subcomm = size/nsubcomm; 2445 nleftover = size - nsubcomm*np_subcomm; 2446 2447 /* block of codes below is specific for INTERLACED */ 2448 /* ------------------------------------------------*/ 2449 nsends = 0; nrecvs = 0; 2450 for (i=0; i<size; i++) { 2451 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2452 send_rank[nsends++] = i; 2453 recv_rank[nrecvs++] = i; 2454 } 2455 } 2456 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2457 i = size-nleftover-1; 2458 j = 0; 2459 while (j < nsubcomm - nleftover) { 2460 send_rank[nsends++] = i; 2461 i--; j++; 2462 } 2463 } 2464 2465 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2466 for (i=0; i<nleftover; i++) { 2467 recv_rank[nrecvs++] = size-nleftover+i; 2468 } 2469 } 2470 /*----------------------------------------------*/ 2471 2472 /* allocate sbuf_j, sbuf_a */ 2473 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2474 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2475 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2476 /* 2477 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2478 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2479 */ 2480 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2481 2482 /* copy mat's local entries into the buffers */ 2483 if (reuse == MAT_INITIAL_MATRIX) { 2484 rownz_max = 0; 2485 rptr = sbuf_j; 2486 cols = sbuf_j + rend-rstart + 1; 2487 vals = sbuf_a; 2488 rptr[0] = 0; 2489 for (i=0; i<rend-rstart; i++) { 2490 row = i + rstart; 2491 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2492 ncols = nzA + nzB; 2493 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2494 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2495 /* load the column indices for this row into cols */ 2496 lwrite = 0; 2497 for (l=0; l<nzB; l++) { 2498 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2499 vals[lwrite] = aworkB[l]; 2500 cols[lwrite++] = ctmp; 2501 } 2502 } 2503 for (l=0; l<nzA; l++) { 2504 vals[lwrite] = aworkA[l]; 2505 cols[lwrite++] = cstart + cworkA[l]; 2506 } 2507 for (l=0; l<nzB; l++) { 2508 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2509 vals[lwrite] = aworkB[l]; 2510 cols[lwrite++] = ctmp; 2511 } 2512 } 2513 vals += ncols; 2514 cols += ncols; 2515 rptr[i+1] = rptr[i] + ncols; 2516 if (rownz_max < ncols) rownz_max = ncols; 2517 } 2518 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2519 } else { /* only copy matrix values into sbuf_a */ 2520 rptr = sbuf_j; 2521 vals = sbuf_a; 2522 rptr[0] = 0; 2523 for (i=0; i<rend-rstart; i++) { 2524 row = i + rstart; 2525 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2526 ncols = nzA + nzB; 2527 cworkB = b->j + b->i[i]; 2528 aworkA = a->a + a->i[i]; 2529 aworkB = b->a + b->i[i]; 2530 lwrite = 0; 2531 for (l=0; l<nzB; l++) { 2532 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2533 } 2534 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2535 for (l=0; l<nzB; l++) { 2536 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2537 } 2538 vals += ncols; 2539 rptr[i+1] = rptr[i] + ncols; 2540 } 2541 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2542 2543 /* send nzlocal to others, and recv other's nzlocal */ 2544 /*--------------------------------------------------*/ 2545 if (reuse == MAT_INITIAL_MATRIX) { 2546 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2547 2548 s_waits2 = s_waits3 + nsends; 2549 s_waits1 = s_waits2 + nsends; 2550 r_waits1 = s_waits1 + nsends; 2551 r_waits2 = r_waits1 + nrecvs; 2552 r_waits3 = r_waits2 + nrecvs; 2553 } else { 2554 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2555 2556 r_waits3 = s_waits3 + nsends; 2557 } 2558 2559 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2560 if (reuse == MAT_INITIAL_MATRIX) { 2561 /* get new tags to keep the communication clean */ 2562 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2563 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2564 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2565 2566 /* post receives of other's nzlocal */ 2567 for (i=0; i<nrecvs; i++) { 2568 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2569 } 2570 /* send nzlocal to others */ 2571 for (i=0; i<nsends; i++) { 2572 sbuf_nz[i] = nzlocal; 2573 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2574 } 2575 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2576 count = nrecvs; 2577 while (count) { 2578 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2579 2580 recv_rank[imdex] = recv_status.MPI_SOURCE; 2581 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2582 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2583 2584 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2585 2586 rbuf_nz[imdex] += i + 2; 2587 2588 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2589 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2590 count--; 2591 } 2592 /* wait on sends of nzlocal */ 2593 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2594 /* send mat->i,j to others, and recv from other's */ 2595 /*------------------------------------------------*/ 2596 for (i=0; i<nsends; i++) { 2597 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2598 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2599 } 2600 /* wait on receives of mat->i,j */ 2601 /*------------------------------*/ 2602 count = nrecvs; 2603 while (count) { 2604 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2605 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2606 count--; 2607 } 2608 /* wait on sends of mat->i,j */ 2609 /*---------------------------*/ 2610 if (nsends) { 2611 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2612 } 2613 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2614 2615 /* post receives, send and receive mat->a */ 2616 /*----------------------------------------*/ 2617 for (imdex=0; imdex<nrecvs; imdex++) { 2618 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2619 } 2620 for (i=0; i<nsends; i++) { 2621 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2622 } 2623 count = nrecvs; 2624 while (count) { 2625 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2626 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2627 count--; 2628 } 2629 if (nsends) { 2630 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2631 } 2632 2633 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2634 2635 /* create redundant matrix */ 2636 /*-------------------------*/ 2637 if (reuse == MAT_INITIAL_MATRIX) { 2638 const PetscInt *range; 2639 PetscInt rstart_sub,rend_sub,mloc_sub; 2640 2641 /* compute rownz_max for preallocation */ 2642 for (imdex=0; imdex<nrecvs; imdex++) { 2643 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2644 rptr = rbuf_j[imdex]; 2645 for (i=0; i<j; i++) { 2646 ncols = rptr[i+1] - rptr[i]; 2647 if (rownz_max < ncols) rownz_max = ncols; 2648 } 2649 } 2650 2651 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2652 2653 /* get local size of redundant matrix 2654 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2655 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2656 rstart_sub = range[nsubcomm*subrank]; 2657 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2658 rend_sub = range[nsubcomm*(subrank+1)]; 2659 } else { 2660 rend_sub = mat->rmap->N; 2661 } 2662 mloc_sub = rend_sub - rstart_sub; 2663 2664 if (M == N) { 2665 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2666 } else { /* non-square matrix */ 2667 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2668 } 2669 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2670 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2671 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2672 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2673 } else { 2674 C = *matredundant; 2675 } 2676 2677 /* insert local matrix entries */ 2678 rptr = sbuf_j; 2679 cols = sbuf_j + rend-rstart + 1; 2680 vals = sbuf_a; 2681 for (i=0; i<rend-rstart; i++) { 2682 row = i + rstart; 2683 ncols = rptr[i+1] - rptr[i]; 2684 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2685 vals += ncols; 2686 cols += ncols; 2687 } 2688 /* insert received matrix entries */ 2689 for (imdex=0; imdex<nrecvs; imdex++) { 2690 rstart = rowrange[recv_rank[imdex]]; 2691 rend = rowrange[recv_rank[imdex]+1]; 2692 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2693 rptr = rbuf_j[imdex]; 2694 cols = rbuf_j[imdex] + rend-rstart + 1; 2695 vals = rbuf_a[imdex]; 2696 for (i=0; i<rend-rstart; i++) { 2697 row = i + rstart; 2698 ncols = rptr[i+1] - rptr[i]; 2699 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2700 vals += ncols; 2701 cols += ncols; 2702 } 2703 } 2704 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2705 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2706 2707 if (reuse == MAT_INITIAL_MATRIX) { 2708 *matredundant = C; 2709 2710 /* create a supporting struct and attach it to C for reuse */ 2711 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2712 if (subsize == 1) { 2713 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2714 c->redundant = redund; 2715 } else { 2716 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2717 c->redundant = redund; 2718 } 2719 2720 redund->nzlocal = nzlocal; 2721 redund->nsends = nsends; 2722 redund->nrecvs = nrecvs; 2723 redund->send_rank = send_rank; 2724 redund->recv_rank = recv_rank; 2725 redund->sbuf_nz = sbuf_nz; 2726 redund->rbuf_nz = rbuf_nz; 2727 redund->sbuf_j = sbuf_j; 2728 redund->sbuf_a = sbuf_a; 2729 redund->rbuf_j = rbuf_j; 2730 redund->rbuf_a = rbuf_a; 2731 redund->psubcomm = NULL; 2732 2733 redund->Destroy = C->ops->destroy; 2734 C->ops->destroy = MatDestroy_MatRedundant; 2735 } 2736 PetscFunctionReturn(0); 2737 } 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2741 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2742 { 2743 PetscErrorCode ierr; 2744 MPI_Comm comm; 2745 PetscMPIInt size,subsize; 2746 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2747 Mat_Redundant *redund=NULL; 2748 PetscSubcomm psubcomm=NULL; 2749 MPI_Comm subcomm_in=subcomm; 2750 Mat *matseq; 2751 IS isrow,iscol; 2752 2753 PetscFunctionBegin; 2754 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2755 if (reuse == MAT_INITIAL_MATRIX) { 2756 /* create psubcomm, then get subcomm */ 2757 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2758 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2759 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2760 2761 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2762 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2763 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2764 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2765 subcomm = psubcomm->comm; 2766 } else { /* retrieve psubcomm and subcomm */ 2767 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2768 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2769 if (subsize == 1) { 2770 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2771 redund = c->redundant; 2772 } else { 2773 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2774 redund = c->redundant; 2775 } 2776 psubcomm = redund->psubcomm; 2777 } 2778 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2779 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2780 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2781 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2782 if (subsize == 1) { 2783 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2784 c->redundant->psubcomm = psubcomm; 2785 } else { 2786 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2787 c->redundant->psubcomm = psubcomm ; 2788 } 2789 } 2790 PetscFunctionReturn(0); 2791 } 2792 } 2793 2794 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2795 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2796 if (reuse == MAT_INITIAL_MATRIX) { 2797 /* create a local sequential matrix matseq[0] */ 2798 mloc_sub = PETSC_DECIDE; 2799 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2800 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2801 rstart = rend - mloc_sub; 2802 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2803 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2804 } else { /* reuse == MAT_REUSE_MATRIX */ 2805 if (subsize == 1) { 2806 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2807 redund = c->redundant; 2808 } else { 2809 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2810 redund = c->redundant; 2811 } 2812 2813 isrow = redund->isrow; 2814 iscol = redund->iscol; 2815 matseq = redund->matseq; 2816 } 2817 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2818 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2819 2820 if (reuse == MAT_INITIAL_MATRIX) { 2821 /* create a supporting struct and attach it to C for reuse */ 2822 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2823 if (subsize == 1) { 2824 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2825 c->redundant = redund; 2826 } else { 2827 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2828 c->redundant = redund; 2829 } 2830 redund->isrow = isrow; 2831 redund->iscol = iscol; 2832 redund->matseq = matseq; 2833 redund->psubcomm = psubcomm; 2834 redund->Destroy = (*matredundant)->ops->destroy; 2835 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2836 } 2837 PetscFunctionReturn(0); 2838 } 2839 2840 #undef __FUNCT__ 2841 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2842 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2843 { 2844 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2845 PetscErrorCode ierr; 2846 PetscInt i,*idxb = 0; 2847 PetscScalar *va,*vb; 2848 Vec vtmp; 2849 2850 PetscFunctionBegin; 2851 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2852 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2853 if (idx) { 2854 for (i=0; i<A->rmap->n; i++) { 2855 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2856 } 2857 } 2858 2859 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2860 if (idx) { 2861 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2862 } 2863 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2864 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2865 2866 for (i=0; i<A->rmap->n; i++) { 2867 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2868 va[i] = vb[i]; 2869 if (idx) idx[i] = a->garray[idxb[i]]; 2870 } 2871 } 2872 2873 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2874 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2875 ierr = PetscFree(idxb);CHKERRQ(ierr); 2876 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2877 PetscFunctionReturn(0); 2878 } 2879 2880 #undef __FUNCT__ 2881 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2882 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2883 { 2884 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2885 PetscErrorCode ierr; 2886 PetscInt i,*idxb = 0; 2887 PetscScalar *va,*vb; 2888 Vec vtmp; 2889 2890 PetscFunctionBegin; 2891 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2892 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2893 if (idx) { 2894 for (i=0; i<A->cmap->n; i++) { 2895 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2896 } 2897 } 2898 2899 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2900 if (idx) { 2901 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2902 } 2903 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2904 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2905 2906 for (i=0; i<A->rmap->n; i++) { 2907 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2908 va[i] = vb[i]; 2909 if (idx) idx[i] = a->garray[idxb[i]]; 2910 } 2911 } 2912 2913 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2914 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2915 ierr = PetscFree(idxb);CHKERRQ(ierr); 2916 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2917 PetscFunctionReturn(0); 2918 } 2919 2920 #undef __FUNCT__ 2921 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2922 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2923 { 2924 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2925 PetscInt n = A->rmap->n; 2926 PetscInt cstart = A->cmap->rstart; 2927 PetscInt *cmap = mat->garray; 2928 PetscInt *diagIdx, *offdiagIdx; 2929 Vec diagV, offdiagV; 2930 PetscScalar *a, *diagA, *offdiagA; 2931 PetscInt r; 2932 PetscErrorCode ierr; 2933 2934 PetscFunctionBegin; 2935 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2936 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2937 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2938 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2939 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2940 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2941 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2942 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2943 for (r = 0; r < n; ++r) { 2944 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2945 a[r] = diagA[r]; 2946 idx[r] = cstart + diagIdx[r]; 2947 } else { 2948 a[r] = offdiagA[r]; 2949 idx[r] = cmap[offdiagIdx[r]]; 2950 } 2951 } 2952 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2953 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2954 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2955 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2956 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2957 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2958 PetscFunctionReturn(0); 2959 } 2960 2961 #undef __FUNCT__ 2962 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2963 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2964 { 2965 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2966 PetscInt n = A->rmap->n; 2967 PetscInt cstart = A->cmap->rstart; 2968 PetscInt *cmap = mat->garray; 2969 PetscInt *diagIdx, *offdiagIdx; 2970 Vec diagV, offdiagV; 2971 PetscScalar *a, *diagA, *offdiagA; 2972 PetscInt r; 2973 PetscErrorCode ierr; 2974 2975 PetscFunctionBegin; 2976 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2977 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2978 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2979 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2980 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2981 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2982 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2983 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2984 for (r = 0; r < n; ++r) { 2985 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2986 a[r] = diagA[r]; 2987 idx[r] = cstart + diagIdx[r]; 2988 } else { 2989 a[r] = offdiagA[r]; 2990 idx[r] = cmap[offdiagIdx[r]]; 2991 } 2992 } 2993 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2994 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2995 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2996 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2997 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2998 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2999 PetscFunctionReturn(0); 3000 } 3001 3002 #undef __FUNCT__ 3003 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3004 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3005 { 3006 PetscErrorCode ierr; 3007 Mat *dummy; 3008 3009 PetscFunctionBegin; 3010 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3011 *newmat = *dummy; 3012 ierr = PetscFree(dummy);CHKERRQ(ierr); 3013 PetscFunctionReturn(0); 3014 } 3015 3016 #undef __FUNCT__ 3017 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3018 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3019 { 3020 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3021 PetscErrorCode ierr; 3022 3023 PetscFunctionBegin; 3024 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 3028 #undef __FUNCT__ 3029 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3030 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3031 { 3032 PetscErrorCode ierr; 3033 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3034 3035 PetscFunctionBegin; 3036 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3037 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3038 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3039 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3040 PetscFunctionReturn(0); 3041 } 3042 3043 /* -------------------------------------------------------------------*/ 3044 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3045 MatGetRow_MPIAIJ, 3046 MatRestoreRow_MPIAIJ, 3047 MatMult_MPIAIJ, 3048 /* 4*/ MatMultAdd_MPIAIJ, 3049 MatMultTranspose_MPIAIJ, 3050 MatMultTransposeAdd_MPIAIJ, 3051 #if defined(PETSC_HAVE_PBGL) 3052 MatSolve_MPIAIJ, 3053 #else 3054 0, 3055 #endif 3056 0, 3057 0, 3058 /*10*/ 0, 3059 0, 3060 0, 3061 MatSOR_MPIAIJ, 3062 MatTranspose_MPIAIJ, 3063 /*15*/ MatGetInfo_MPIAIJ, 3064 MatEqual_MPIAIJ, 3065 MatGetDiagonal_MPIAIJ, 3066 MatDiagonalScale_MPIAIJ, 3067 MatNorm_MPIAIJ, 3068 /*20*/ MatAssemblyBegin_MPIAIJ, 3069 MatAssemblyEnd_MPIAIJ, 3070 MatSetOption_MPIAIJ, 3071 MatZeroEntries_MPIAIJ, 3072 /*24*/ MatZeroRows_MPIAIJ, 3073 0, 3074 #if defined(PETSC_HAVE_PBGL) 3075 0, 3076 #else 3077 0, 3078 #endif 3079 0, 3080 0, 3081 /*29*/ MatSetUp_MPIAIJ, 3082 #if defined(PETSC_HAVE_PBGL) 3083 0, 3084 #else 3085 0, 3086 #endif 3087 0, 3088 0, 3089 0, 3090 /*34*/ MatDuplicate_MPIAIJ, 3091 0, 3092 0, 3093 0, 3094 0, 3095 /*39*/ MatAXPY_MPIAIJ, 3096 MatGetSubMatrices_MPIAIJ, 3097 MatIncreaseOverlap_MPIAIJ, 3098 MatGetValues_MPIAIJ, 3099 MatCopy_MPIAIJ, 3100 /*44*/ MatGetRowMax_MPIAIJ, 3101 MatScale_MPIAIJ, 3102 0, 3103 0, 3104 MatZeroRowsColumns_MPIAIJ, 3105 /*49*/ MatSetRandom_MPIAIJ, 3106 0, 3107 0, 3108 0, 3109 0, 3110 /*54*/ MatFDColoringCreate_MPIXAIJ, 3111 0, 3112 MatSetUnfactored_MPIAIJ, 3113 MatPermute_MPIAIJ, 3114 0, 3115 /*59*/ MatGetSubMatrix_MPIAIJ, 3116 MatDestroy_MPIAIJ, 3117 MatView_MPIAIJ, 3118 0, 3119 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3120 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3121 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3122 0, 3123 0, 3124 0, 3125 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3126 MatGetRowMinAbs_MPIAIJ, 3127 0, 3128 MatSetColoring_MPIAIJ, 3129 0, 3130 MatSetValuesAdifor_MPIAIJ, 3131 /*75*/ MatFDColoringApply_AIJ, 3132 0, 3133 0, 3134 0, 3135 MatFindZeroDiagonals_MPIAIJ, 3136 /*80*/ 0, 3137 0, 3138 0, 3139 /*83*/ MatLoad_MPIAIJ, 3140 0, 3141 0, 3142 0, 3143 0, 3144 0, 3145 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3146 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3147 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3148 MatPtAP_MPIAIJ_MPIAIJ, 3149 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3150 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3151 0, 3152 0, 3153 0, 3154 0, 3155 /*99*/ 0, 3156 0, 3157 0, 3158 MatConjugate_MPIAIJ, 3159 0, 3160 /*104*/MatSetValuesRow_MPIAIJ, 3161 MatRealPart_MPIAIJ, 3162 MatImaginaryPart_MPIAIJ, 3163 0, 3164 0, 3165 /*109*/0, 3166 MatGetRedundantMatrix_MPIAIJ, 3167 MatGetRowMin_MPIAIJ, 3168 0, 3169 0, 3170 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3171 0, 3172 0, 3173 0, 3174 0, 3175 /*119*/0, 3176 0, 3177 0, 3178 0, 3179 MatGetMultiProcBlock_MPIAIJ, 3180 /*124*/MatFindNonzeroRows_MPIAIJ, 3181 MatGetColumnNorms_MPIAIJ, 3182 MatInvertBlockDiagonal_MPIAIJ, 3183 0, 3184 MatGetSubMatricesParallel_MPIAIJ, 3185 /*129*/0, 3186 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3187 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3188 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3189 0, 3190 /*134*/0, 3191 0, 3192 0, 3193 0, 3194 0, 3195 /*139*/0, 3196 0, 3197 0, 3198 MatFDColoringSetUp_MPIXAIJ 3199 }; 3200 3201 /* ----------------------------------------------------------------------------------------*/ 3202 3203 #undef __FUNCT__ 3204 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3205 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3206 { 3207 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3208 PetscErrorCode ierr; 3209 3210 PetscFunctionBegin; 3211 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3212 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3213 PetscFunctionReturn(0); 3214 } 3215 3216 #undef __FUNCT__ 3217 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3218 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3219 { 3220 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3221 PetscErrorCode ierr; 3222 3223 PetscFunctionBegin; 3224 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3225 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3226 PetscFunctionReturn(0); 3227 } 3228 3229 #undef __FUNCT__ 3230 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3231 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3232 { 3233 Mat_MPIAIJ *b; 3234 PetscErrorCode ierr; 3235 3236 PetscFunctionBegin; 3237 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3238 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3239 b = (Mat_MPIAIJ*)B->data; 3240 3241 if (!B->preallocated) { 3242 /* Explicitly create 2 MATSEQAIJ matrices. */ 3243 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3244 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3245 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3246 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3247 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3248 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3249 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3250 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3251 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3252 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3253 } 3254 3255 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3256 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3257 B->preallocated = PETSC_TRUE; 3258 PetscFunctionReturn(0); 3259 } 3260 3261 #undef __FUNCT__ 3262 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3263 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3264 { 3265 Mat mat; 3266 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3267 PetscErrorCode ierr; 3268 3269 PetscFunctionBegin; 3270 *newmat = 0; 3271 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3272 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3273 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3274 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3275 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3276 a = (Mat_MPIAIJ*)mat->data; 3277 3278 mat->factortype = matin->factortype; 3279 mat->rmap->bs = matin->rmap->bs; 3280 mat->cmap->bs = matin->cmap->bs; 3281 mat->assembled = PETSC_TRUE; 3282 mat->insertmode = NOT_SET_VALUES; 3283 mat->preallocated = PETSC_TRUE; 3284 3285 a->size = oldmat->size; 3286 a->rank = oldmat->rank; 3287 a->donotstash = oldmat->donotstash; 3288 a->roworiented = oldmat->roworiented; 3289 a->rowindices = 0; 3290 a->rowvalues = 0; 3291 a->getrowactive = PETSC_FALSE; 3292 3293 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3294 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3295 3296 if (oldmat->colmap) { 3297 #if defined(PETSC_USE_CTABLE) 3298 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3299 #else 3300 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3301 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3302 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3303 #endif 3304 } else a->colmap = 0; 3305 if (oldmat->garray) { 3306 PetscInt len; 3307 len = oldmat->B->cmap->n; 3308 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3309 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3310 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3311 } else a->garray = 0; 3312 3313 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3314 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3315 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3316 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3317 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3318 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3319 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3320 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3321 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3322 *newmat = mat; 3323 PetscFunctionReturn(0); 3324 } 3325 3326 3327 3328 #undef __FUNCT__ 3329 #define __FUNCT__ "MatLoad_MPIAIJ" 3330 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3331 { 3332 PetscScalar *vals,*svals; 3333 MPI_Comm comm; 3334 PetscErrorCode ierr; 3335 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3336 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3337 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3338 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3339 PetscInt cend,cstart,n,*rowners,sizesset=1; 3340 int fd; 3341 PetscInt bs = 1; 3342 3343 PetscFunctionBegin; 3344 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3345 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3346 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3347 if (!rank) { 3348 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3349 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3350 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3351 } 3352 3353 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3354 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3355 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3356 3357 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3358 3359 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3360 M = header[1]; N = header[2]; 3361 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3362 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3363 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3364 3365 /* If global sizes are set, check if they are consistent with that given in the file */ 3366 if (sizesset) { 3367 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3368 } 3369 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3370 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3371 3372 /* determine ownership of all (block) rows */ 3373 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3374 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3375 else m = newMat->rmap->n; /* Set by user */ 3376 3377 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3378 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3379 3380 /* First process needs enough room for process with most rows */ 3381 if (!rank) { 3382 mmax = rowners[1]; 3383 for (i=2; i<=size; i++) { 3384 mmax = PetscMax(mmax, rowners[i]); 3385 } 3386 } else mmax = -1; /* unused, but compilers complain */ 3387 3388 rowners[0] = 0; 3389 for (i=2; i<=size; i++) { 3390 rowners[i] += rowners[i-1]; 3391 } 3392 rstart = rowners[rank]; 3393 rend = rowners[rank+1]; 3394 3395 /* distribute row lengths to all processors */ 3396 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3397 if (!rank) { 3398 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3399 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3400 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3401 for (j=0; j<m; j++) { 3402 procsnz[0] += ourlens[j]; 3403 } 3404 for (i=1; i<size; i++) { 3405 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3406 /* calculate the number of nonzeros on each processor */ 3407 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3408 procsnz[i] += rowlengths[j]; 3409 } 3410 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3411 } 3412 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3413 } else { 3414 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3415 } 3416 3417 if (!rank) { 3418 /* determine max buffer needed and allocate it */ 3419 maxnz = 0; 3420 for (i=0; i<size; i++) { 3421 maxnz = PetscMax(maxnz,procsnz[i]); 3422 } 3423 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3424 3425 /* read in my part of the matrix column indices */ 3426 nz = procsnz[0]; 3427 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3428 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3429 3430 /* read in every one elses and ship off */ 3431 for (i=1; i<size; i++) { 3432 nz = procsnz[i]; 3433 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3434 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3435 } 3436 ierr = PetscFree(cols);CHKERRQ(ierr); 3437 } else { 3438 /* determine buffer space needed for message */ 3439 nz = 0; 3440 for (i=0; i<m; i++) { 3441 nz += ourlens[i]; 3442 } 3443 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3444 3445 /* receive message of column indices*/ 3446 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3447 } 3448 3449 /* determine column ownership if matrix is not square */ 3450 if (N != M) { 3451 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3452 else n = newMat->cmap->n; 3453 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3454 cstart = cend - n; 3455 } else { 3456 cstart = rstart; 3457 cend = rend; 3458 n = cend - cstart; 3459 } 3460 3461 /* loop over local rows, determining number of off diagonal entries */ 3462 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3463 jj = 0; 3464 for (i=0; i<m; i++) { 3465 for (j=0; j<ourlens[i]; j++) { 3466 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3467 jj++; 3468 } 3469 } 3470 3471 for (i=0; i<m; i++) { 3472 ourlens[i] -= offlens[i]; 3473 } 3474 if (!sizesset) { 3475 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3476 } 3477 3478 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3479 3480 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3481 3482 for (i=0; i<m; i++) { 3483 ourlens[i] += offlens[i]; 3484 } 3485 3486 if (!rank) { 3487 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3488 3489 /* read in my part of the matrix numerical values */ 3490 nz = procsnz[0]; 3491 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3492 3493 /* insert into matrix */ 3494 jj = rstart; 3495 smycols = mycols; 3496 svals = vals; 3497 for (i=0; i<m; i++) { 3498 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3499 smycols += ourlens[i]; 3500 svals += ourlens[i]; 3501 jj++; 3502 } 3503 3504 /* read in other processors and ship out */ 3505 for (i=1; i<size; i++) { 3506 nz = procsnz[i]; 3507 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3508 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3509 } 3510 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3511 } else { 3512 /* receive numeric values */ 3513 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3514 3515 /* receive message of values*/ 3516 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3517 3518 /* insert into matrix */ 3519 jj = rstart; 3520 smycols = mycols; 3521 svals = vals; 3522 for (i=0; i<m; i++) { 3523 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3524 smycols += ourlens[i]; 3525 svals += ourlens[i]; 3526 jj++; 3527 } 3528 } 3529 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3530 ierr = PetscFree(vals);CHKERRQ(ierr); 3531 ierr = PetscFree(mycols);CHKERRQ(ierr); 3532 ierr = PetscFree(rowners);CHKERRQ(ierr); 3533 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3534 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3535 PetscFunctionReturn(0); 3536 } 3537 3538 #undef __FUNCT__ 3539 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3540 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3541 { 3542 PetscErrorCode ierr; 3543 IS iscol_local; 3544 PetscInt csize; 3545 3546 PetscFunctionBegin; 3547 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3548 if (call == MAT_REUSE_MATRIX) { 3549 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3550 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3551 } else { 3552 PetscInt cbs; 3553 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3554 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3555 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3556 } 3557 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3558 if (call == MAT_INITIAL_MATRIX) { 3559 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3560 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3561 } 3562 PetscFunctionReturn(0); 3563 } 3564 3565 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3566 #undef __FUNCT__ 3567 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3568 /* 3569 Not great since it makes two copies of the submatrix, first an SeqAIJ 3570 in local and then by concatenating the local matrices the end result. 3571 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3572 3573 Note: This requires a sequential iscol with all indices. 3574 */ 3575 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3576 { 3577 PetscErrorCode ierr; 3578 PetscMPIInt rank,size; 3579 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3580 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3581 PetscBool allcolumns, colflag; 3582 Mat M,Mreuse; 3583 MatScalar *vwork,*aa; 3584 MPI_Comm comm; 3585 Mat_SeqAIJ *aij; 3586 3587 PetscFunctionBegin; 3588 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3589 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3590 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3591 3592 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3593 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3594 if (colflag && ncol == mat->cmap->N) { 3595 allcolumns = PETSC_TRUE; 3596 } else { 3597 allcolumns = PETSC_FALSE; 3598 } 3599 if (call == MAT_REUSE_MATRIX) { 3600 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3601 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3602 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3603 } else { 3604 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3605 } 3606 3607 /* 3608 m - number of local rows 3609 n - number of columns (same on all processors) 3610 rstart - first row in new global matrix generated 3611 */ 3612 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3613 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3614 if (call == MAT_INITIAL_MATRIX) { 3615 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3616 ii = aij->i; 3617 jj = aij->j; 3618 3619 /* 3620 Determine the number of non-zeros in the diagonal and off-diagonal 3621 portions of the matrix in order to do correct preallocation 3622 */ 3623 3624 /* first get start and end of "diagonal" columns */ 3625 if (csize == PETSC_DECIDE) { 3626 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3627 if (mglobal == n) { /* square matrix */ 3628 nlocal = m; 3629 } else { 3630 nlocal = n/size + ((n % size) > rank); 3631 } 3632 } else { 3633 nlocal = csize; 3634 } 3635 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3636 rstart = rend - nlocal; 3637 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3638 3639 /* next, compute all the lengths */ 3640 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3641 olens = dlens + m; 3642 for (i=0; i<m; i++) { 3643 jend = ii[i+1] - ii[i]; 3644 olen = 0; 3645 dlen = 0; 3646 for (j=0; j<jend; j++) { 3647 if (*jj < rstart || *jj >= rend) olen++; 3648 else dlen++; 3649 jj++; 3650 } 3651 olens[i] = olen; 3652 dlens[i] = dlen; 3653 } 3654 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3655 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3656 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3657 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3658 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3659 ierr = PetscFree(dlens);CHKERRQ(ierr); 3660 } else { 3661 PetscInt ml,nl; 3662 3663 M = *newmat; 3664 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3665 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3666 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3667 /* 3668 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3669 rather than the slower MatSetValues(). 3670 */ 3671 M->was_assembled = PETSC_TRUE; 3672 M->assembled = PETSC_FALSE; 3673 } 3674 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3675 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3676 ii = aij->i; 3677 jj = aij->j; 3678 aa = aij->a; 3679 for (i=0; i<m; i++) { 3680 row = rstart + i; 3681 nz = ii[i+1] - ii[i]; 3682 cwork = jj; jj += nz; 3683 vwork = aa; aa += nz; 3684 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3685 } 3686 3687 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3688 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3689 *newmat = M; 3690 3691 /* save submatrix used in processor for next request */ 3692 if (call == MAT_INITIAL_MATRIX) { 3693 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3694 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3695 } 3696 PetscFunctionReturn(0); 3697 } 3698 3699 #undef __FUNCT__ 3700 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3701 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3702 { 3703 PetscInt m,cstart, cend,j,nnz,i,d; 3704 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3705 const PetscInt *JJ; 3706 PetscScalar *values; 3707 PetscErrorCode ierr; 3708 3709 PetscFunctionBegin; 3710 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3711 3712 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3713 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3714 m = B->rmap->n; 3715 cstart = B->cmap->rstart; 3716 cend = B->cmap->rend; 3717 rstart = B->rmap->rstart; 3718 3719 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3720 3721 #if defined(PETSC_USE_DEBUGGING) 3722 for (i=0; i<m; i++) { 3723 nnz = Ii[i+1]- Ii[i]; 3724 JJ = J + Ii[i]; 3725 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3726 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3727 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3728 } 3729 #endif 3730 3731 for (i=0; i<m; i++) { 3732 nnz = Ii[i+1]- Ii[i]; 3733 JJ = J + Ii[i]; 3734 nnz_max = PetscMax(nnz_max,nnz); 3735 d = 0; 3736 for (j=0; j<nnz; j++) { 3737 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3738 } 3739 d_nnz[i] = d; 3740 o_nnz[i] = nnz - d; 3741 } 3742 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3743 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3744 3745 if (v) values = (PetscScalar*)v; 3746 else { 3747 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3748 } 3749 3750 for (i=0; i<m; i++) { 3751 ii = i + rstart; 3752 nnz = Ii[i+1]- Ii[i]; 3753 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3754 } 3755 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3756 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3757 3758 if (!v) { 3759 ierr = PetscFree(values);CHKERRQ(ierr); 3760 } 3761 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3762 PetscFunctionReturn(0); 3763 } 3764 3765 #undef __FUNCT__ 3766 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3767 /*@ 3768 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3769 (the default parallel PETSc format). 3770 3771 Collective on MPI_Comm 3772 3773 Input Parameters: 3774 + B - the matrix 3775 . i - the indices into j for the start of each local row (starts with zero) 3776 . j - the column indices for each local row (starts with zero) 3777 - v - optional values in the matrix 3778 3779 Level: developer 3780 3781 Notes: 3782 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3783 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3784 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3785 3786 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3787 3788 The format which is used for the sparse matrix input, is equivalent to a 3789 row-major ordering.. i.e for the following matrix, the input data expected is 3790 as shown: 3791 3792 1 0 0 3793 2 0 3 P0 3794 ------- 3795 4 5 6 P1 3796 3797 Process0 [P0]: rows_owned=[0,1] 3798 i = {0,1,3} [size = nrow+1 = 2+1] 3799 j = {0,0,2} [size = nz = 6] 3800 v = {1,2,3} [size = nz = 6] 3801 3802 Process1 [P1]: rows_owned=[2] 3803 i = {0,3} [size = nrow+1 = 1+1] 3804 j = {0,1,2} [size = nz = 6] 3805 v = {4,5,6} [size = nz = 6] 3806 3807 .keywords: matrix, aij, compressed row, sparse, parallel 3808 3809 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3810 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3811 @*/ 3812 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3813 { 3814 PetscErrorCode ierr; 3815 3816 PetscFunctionBegin; 3817 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3818 PetscFunctionReturn(0); 3819 } 3820 3821 #undef __FUNCT__ 3822 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3823 /*@C 3824 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3825 (the default parallel PETSc format). For good matrix assembly performance 3826 the user should preallocate the matrix storage by setting the parameters 3827 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3828 performance can be increased by more than a factor of 50. 3829 3830 Collective on MPI_Comm 3831 3832 Input Parameters: 3833 + A - the matrix 3834 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3835 (same value is used for all local rows) 3836 . d_nnz - array containing the number of nonzeros in the various rows of the 3837 DIAGONAL portion of the local submatrix (possibly different for each row) 3838 or NULL, if d_nz is used to specify the nonzero structure. 3839 The size of this array is equal to the number of local rows, i.e 'm'. 3840 For matrices that will be factored, you must leave room for (and set) 3841 the diagonal entry even if it is zero. 3842 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3843 submatrix (same value is used for all local rows). 3844 - o_nnz - array containing the number of nonzeros in the various rows of the 3845 OFF-DIAGONAL portion of the local submatrix (possibly different for 3846 each row) or NULL, if o_nz is used to specify the nonzero 3847 structure. The size of this array is equal to the number 3848 of local rows, i.e 'm'. 3849 3850 If the *_nnz parameter is given then the *_nz parameter is ignored 3851 3852 The AIJ format (also called the Yale sparse matrix format or 3853 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3854 storage. The stored row and column indices begin with zero. 3855 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3856 3857 The parallel matrix is partitioned such that the first m0 rows belong to 3858 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3859 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3860 3861 The DIAGONAL portion of the local submatrix of a processor can be defined 3862 as the submatrix which is obtained by extraction the part corresponding to 3863 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3864 first row that belongs to the processor, r2 is the last row belonging to 3865 the this processor, and c1-c2 is range of indices of the local part of a 3866 vector suitable for applying the matrix to. This is an mxn matrix. In the 3867 common case of a square matrix, the row and column ranges are the same and 3868 the DIAGONAL part is also square. The remaining portion of the local 3869 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3870 3871 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3872 3873 You can call MatGetInfo() to get information on how effective the preallocation was; 3874 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3875 You can also run with the option -info and look for messages with the string 3876 malloc in them to see if additional memory allocation was needed. 3877 3878 Example usage: 3879 3880 Consider the following 8x8 matrix with 34 non-zero values, that is 3881 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3882 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3883 as follows: 3884 3885 .vb 3886 1 2 0 | 0 3 0 | 0 4 3887 Proc0 0 5 6 | 7 0 0 | 8 0 3888 9 0 10 | 11 0 0 | 12 0 3889 ------------------------------------- 3890 13 0 14 | 15 16 17 | 0 0 3891 Proc1 0 18 0 | 19 20 21 | 0 0 3892 0 0 0 | 22 23 0 | 24 0 3893 ------------------------------------- 3894 Proc2 25 26 27 | 0 0 28 | 29 0 3895 30 0 0 | 31 32 33 | 0 34 3896 .ve 3897 3898 This can be represented as a collection of submatrices as: 3899 3900 .vb 3901 A B C 3902 D E F 3903 G H I 3904 .ve 3905 3906 Where the submatrices A,B,C are owned by proc0, D,E,F are 3907 owned by proc1, G,H,I are owned by proc2. 3908 3909 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3910 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3911 The 'M','N' parameters are 8,8, and have the same values on all procs. 3912 3913 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3914 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3915 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3916 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3917 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3918 matrix, ans [DF] as another SeqAIJ matrix. 3919 3920 When d_nz, o_nz parameters are specified, d_nz storage elements are 3921 allocated for every row of the local diagonal submatrix, and o_nz 3922 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3923 One way to choose d_nz and o_nz is to use the max nonzerors per local 3924 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3925 In this case, the values of d_nz,o_nz are: 3926 .vb 3927 proc0 : dnz = 2, o_nz = 2 3928 proc1 : dnz = 3, o_nz = 2 3929 proc2 : dnz = 1, o_nz = 4 3930 .ve 3931 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3932 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3933 for proc3. i.e we are using 12+15+10=37 storage locations to store 3934 34 values. 3935 3936 When d_nnz, o_nnz parameters are specified, the storage is specified 3937 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3938 In the above case the values for d_nnz,o_nnz are: 3939 .vb 3940 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3941 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3942 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3943 .ve 3944 Here the space allocated is sum of all the above values i.e 34, and 3945 hence pre-allocation is perfect. 3946 3947 Level: intermediate 3948 3949 .keywords: matrix, aij, compressed row, sparse, parallel 3950 3951 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3952 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3953 @*/ 3954 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3955 { 3956 PetscErrorCode ierr; 3957 3958 PetscFunctionBegin; 3959 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3960 PetscValidType(B,1); 3961 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3962 PetscFunctionReturn(0); 3963 } 3964 3965 #undef __FUNCT__ 3966 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3967 /*@ 3968 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3969 CSR format the local rows. 3970 3971 Collective on MPI_Comm 3972 3973 Input Parameters: 3974 + comm - MPI communicator 3975 . m - number of local rows (Cannot be PETSC_DECIDE) 3976 . n - This value should be the same as the local size used in creating the 3977 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3978 calculated if N is given) For square matrices n is almost always m. 3979 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3980 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3981 . i - row indices 3982 . j - column indices 3983 - a - matrix values 3984 3985 Output Parameter: 3986 . mat - the matrix 3987 3988 Level: intermediate 3989 3990 Notes: 3991 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3992 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3993 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3994 3995 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3996 3997 The format which is used for the sparse matrix input, is equivalent to a 3998 row-major ordering.. i.e for the following matrix, the input data expected is 3999 as shown: 4000 4001 1 0 0 4002 2 0 3 P0 4003 ------- 4004 4 5 6 P1 4005 4006 Process0 [P0]: rows_owned=[0,1] 4007 i = {0,1,3} [size = nrow+1 = 2+1] 4008 j = {0,0,2} [size = nz = 6] 4009 v = {1,2,3} [size = nz = 6] 4010 4011 Process1 [P1]: rows_owned=[2] 4012 i = {0,3} [size = nrow+1 = 1+1] 4013 j = {0,1,2} [size = nz = 6] 4014 v = {4,5,6} [size = nz = 6] 4015 4016 .keywords: matrix, aij, compressed row, sparse, parallel 4017 4018 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4019 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4020 @*/ 4021 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4022 { 4023 PetscErrorCode ierr; 4024 4025 PetscFunctionBegin; 4026 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4027 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4028 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4029 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4030 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4031 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4032 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4033 PetscFunctionReturn(0); 4034 } 4035 4036 #undef __FUNCT__ 4037 #define __FUNCT__ "MatCreateAIJ" 4038 /*@C 4039 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4040 (the default parallel PETSc format). For good matrix assembly performance 4041 the user should preallocate the matrix storage by setting the parameters 4042 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4043 performance can be increased by more than a factor of 50. 4044 4045 Collective on MPI_Comm 4046 4047 Input Parameters: 4048 + comm - MPI communicator 4049 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4050 This value should be the same as the local size used in creating the 4051 y vector for the matrix-vector product y = Ax. 4052 . n - This value should be the same as the local size used in creating the 4053 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4054 calculated if N is given) For square matrices n is almost always m. 4055 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4056 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4057 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4058 (same value is used for all local rows) 4059 . d_nnz - array containing the number of nonzeros in the various rows of the 4060 DIAGONAL portion of the local submatrix (possibly different for each row) 4061 or NULL, if d_nz is used to specify the nonzero structure. 4062 The size of this array is equal to the number of local rows, i.e 'm'. 4063 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4064 submatrix (same value is used for all local rows). 4065 - o_nnz - array containing the number of nonzeros in the various rows of the 4066 OFF-DIAGONAL portion of the local submatrix (possibly different for 4067 each row) or NULL, if o_nz is used to specify the nonzero 4068 structure. The size of this array is equal to the number 4069 of local rows, i.e 'm'. 4070 4071 Output Parameter: 4072 . A - the matrix 4073 4074 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4075 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4076 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4077 4078 Notes: 4079 If the *_nnz parameter is given then the *_nz parameter is ignored 4080 4081 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4082 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4083 storage requirements for this matrix. 4084 4085 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4086 processor than it must be used on all processors that share the object for 4087 that argument. 4088 4089 The user MUST specify either the local or global matrix dimensions 4090 (possibly both). 4091 4092 The parallel matrix is partitioned across processors such that the 4093 first m0 rows belong to process 0, the next m1 rows belong to 4094 process 1, the next m2 rows belong to process 2 etc.. where 4095 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4096 values corresponding to [m x N] submatrix. 4097 4098 The columns are logically partitioned with the n0 columns belonging 4099 to 0th partition, the next n1 columns belonging to the next 4100 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4101 4102 The DIAGONAL portion of the local submatrix on any given processor 4103 is the submatrix corresponding to the rows and columns m,n 4104 corresponding to the given processor. i.e diagonal matrix on 4105 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4106 etc. The remaining portion of the local submatrix [m x (N-n)] 4107 constitute the OFF-DIAGONAL portion. The example below better 4108 illustrates this concept. 4109 4110 For a square global matrix we define each processor's diagonal portion 4111 to be its local rows and the corresponding columns (a square submatrix); 4112 each processor's off-diagonal portion encompasses the remainder of the 4113 local matrix (a rectangular submatrix). 4114 4115 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4116 4117 When calling this routine with a single process communicator, a matrix of 4118 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4119 type of communicator, use the construction mechanism: 4120 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4121 4122 By default, this format uses inodes (identical nodes) when possible. 4123 We search for consecutive rows with the same nonzero structure, thereby 4124 reusing matrix information to achieve increased efficiency. 4125 4126 Options Database Keys: 4127 + -mat_no_inode - Do not use inodes 4128 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4129 - -mat_aij_oneindex - Internally use indexing starting at 1 4130 rather than 0. Note that when calling MatSetValues(), 4131 the user still MUST index entries starting at 0! 4132 4133 4134 Example usage: 4135 4136 Consider the following 8x8 matrix with 34 non-zero values, that is 4137 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4138 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4139 as follows: 4140 4141 .vb 4142 1 2 0 | 0 3 0 | 0 4 4143 Proc0 0 5 6 | 7 0 0 | 8 0 4144 9 0 10 | 11 0 0 | 12 0 4145 ------------------------------------- 4146 13 0 14 | 15 16 17 | 0 0 4147 Proc1 0 18 0 | 19 20 21 | 0 0 4148 0 0 0 | 22 23 0 | 24 0 4149 ------------------------------------- 4150 Proc2 25 26 27 | 0 0 28 | 29 0 4151 30 0 0 | 31 32 33 | 0 34 4152 .ve 4153 4154 This can be represented as a collection of submatrices as: 4155 4156 .vb 4157 A B C 4158 D E F 4159 G H I 4160 .ve 4161 4162 Where the submatrices A,B,C are owned by proc0, D,E,F are 4163 owned by proc1, G,H,I are owned by proc2. 4164 4165 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4166 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4167 The 'M','N' parameters are 8,8, and have the same values on all procs. 4168 4169 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4170 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4171 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4172 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4173 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4174 matrix, ans [DF] as another SeqAIJ matrix. 4175 4176 When d_nz, o_nz parameters are specified, d_nz storage elements are 4177 allocated for every row of the local diagonal submatrix, and o_nz 4178 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4179 One way to choose d_nz and o_nz is to use the max nonzerors per local 4180 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4181 In this case, the values of d_nz,o_nz are: 4182 .vb 4183 proc0 : dnz = 2, o_nz = 2 4184 proc1 : dnz = 3, o_nz = 2 4185 proc2 : dnz = 1, o_nz = 4 4186 .ve 4187 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4188 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4189 for proc3. i.e we are using 12+15+10=37 storage locations to store 4190 34 values. 4191 4192 When d_nnz, o_nnz parameters are specified, the storage is specified 4193 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4194 In the above case the values for d_nnz,o_nnz are: 4195 .vb 4196 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4197 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4198 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4199 .ve 4200 Here the space allocated is sum of all the above values i.e 34, and 4201 hence pre-allocation is perfect. 4202 4203 Level: intermediate 4204 4205 .keywords: matrix, aij, compressed row, sparse, parallel 4206 4207 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4208 MPIAIJ, MatCreateMPIAIJWithArrays() 4209 @*/ 4210 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4211 { 4212 PetscErrorCode ierr; 4213 PetscMPIInt size; 4214 4215 PetscFunctionBegin; 4216 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4217 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4218 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4219 if (size > 1) { 4220 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4221 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4222 } else { 4223 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4224 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4225 } 4226 PetscFunctionReturn(0); 4227 } 4228 4229 #undef __FUNCT__ 4230 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4231 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4232 { 4233 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4234 4235 PetscFunctionBegin; 4236 *Ad = a->A; 4237 *Ao = a->B; 4238 *colmap = a->garray; 4239 PetscFunctionReturn(0); 4240 } 4241 4242 #undef __FUNCT__ 4243 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4244 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4245 { 4246 PetscErrorCode ierr; 4247 PetscInt i; 4248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4249 4250 PetscFunctionBegin; 4251 if (coloring->ctype == IS_COLORING_GLOBAL) { 4252 ISColoringValue *allcolors,*colors; 4253 ISColoring ocoloring; 4254 4255 /* set coloring for diagonal portion */ 4256 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4257 4258 /* set coloring for off-diagonal portion */ 4259 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4260 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4261 for (i=0; i<a->B->cmap->n; i++) { 4262 colors[i] = allcolors[a->garray[i]]; 4263 } 4264 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4265 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4266 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4267 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4268 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4269 ISColoringValue *colors; 4270 PetscInt *larray; 4271 ISColoring ocoloring; 4272 4273 /* set coloring for diagonal portion */ 4274 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4275 for (i=0; i<a->A->cmap->n; i++) { 4276 larray[i] = i + A->cmap->rstart; 4277 } 4278 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4279 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4280 for (i=0; i<a->A->cmap->n; i++) { 4281 colors[i] = coloring->colors[larray[i]]; 4282 } 4283 ierr = PetscFree(larray);CHKERRQ(ierr); 4284 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4285 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4286 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4287 4288 /* set coloring for off-diagonal portion */ 4289 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4290 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4291 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4292 for (i=0; i<a->B->cmap->n; i++) { 4293 colors[i] = coloring->colors[larray[i]]; 4294 } 4295 ierr = PetscFree(larray);CHKERRQ(ierr); 4296 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4297 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4298 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4299 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4300 PetscFunctionReturn(0); 4301 } 4302 4303 #undef __FUNCT__ 4304 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4305 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4306 { 4307 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4308 PetscErrorCode ierr; 4309 4310 PetscFunctionBegin; 4311 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4312 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4313 PetscFunctionReturn(0); 4314 } 4315 4316 #undef __FUNCT__ 4317 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4318 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4319 { 4320 PetscErrorCode ierr; 4321 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4322 PetscInt *indx; 4323 4324 PetscFunctionBegin; 4325 /* This routine will ONLY return MPIAIJ type matrix */ 4326 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4327 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4328 if (n == PETSC_DECIDE) { 4329 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4330 } 4331 /* Check sum(n) = N */ 4332 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4333 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4334 4335 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4336 rstart -= m; 4337 4338 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4339 for (i=0; i<m; i++) { 4340 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4341 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4342 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4343 } 4344 4345 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4346 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4347 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4348 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4349 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4350 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4351 PetscFunctionReturn(0); 4352 } 4353 4354 #undef __FUNCT__ 4355 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4356 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4357 { 4358 PetscErrorCode ierr; 4359 PetscInt m,N,i,rstart,nnz,Ii; 4360 PetscInt *indx; 4361 PetscScalar *values; 4362 4363 PetscFunctionBegin; 4364 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4365 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4366 for (i=0; i<m; i++) { 4367 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4368 Ii = i + rstart; 4369 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4370 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4371 } 4372 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4373 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4374 PetscFunctionReturn(0); 4375 } 4376 4377 #undef __FUNCT__ 4378 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4379 /*@ 4380 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4381 matrices from each processor 4382 4383 Collective on MPI_Comm 4384 4385 Input Parameters: 4386 + comm - the communicators the parallel matrix will live on 4387 . inmat - the input sequential matrices 4388 . n - number of local columns (or PETSC_DECIDE) 4389 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4390 4391 Output Parameter: 4392 . outmat - the parallel matrix generated 4393 4394 Level: advanced 4395 4396 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4397 4398 @*/ 4399 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4400 { 4401 PetscErrorCode ierr; 4402 PetscMPIInt size; 4403 4404 PetscFunctionBegin; 4405 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4406 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4407 if (size == 1) { 4408 if (scall == MAT_INITIAL_MATRIX) { 4409 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4410 } else { 4411 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4412 } 4413 } else { 4414 if (scall == MAT_INITIAL_MATRIX) { 4415 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4416 } 4417 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4418 } 4419 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4420 PetscFunctionReturn(0); 4421 } 4422 4423 #undef __FUNCT__ 4424 #define __FUNCT__ "MatFileSplit" 4425 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4426 { 4427 PetscErrorCode ierr; 4428 PetscMPIInt rank; 4429 PetscInt m,N,i,rstart,nnz; 4430 size_t len; 4431 const PetscInt *indx; 4432 PetscViewer out; 4433 char *name; 4434 Mat B; 4435 const PetscScalar *values; 4436 4437 PetscFunctionBegin; 4438 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4439 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4440 /* Should this be the type of the diagonal block of A? */ 4441 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4442 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4443 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4444 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4445 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4446 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4447 for (i=0; i<m; i++) { 4448 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4449 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4450 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4451 } 4452 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4453 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4454 4455 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4456 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4457 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4458 sprintf(name,"%s.%d",outfile,rank); 4459 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4460 ierr = PetscFree(name);CHKERRQ(ierr); 4461 ierr = MatView(B,out);CHKERRQ(ierr); 4462 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4463 ierr = MatDestroy(&B);CHKERRQ(ierr); 4464 PetscFunctionReturn(0); 4465 } 4466 4467 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4468 #undef __FUNCT__ 4469 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4470 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4471 { 4472 PetscErrorCode ierr; 4473 Mat_Merge_SeqsToMPI *merge; 4474 PetscContainer container; 4475 4476 PetscFunctionBegin; 4477 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4478 if (container) { 4479 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4487 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4492 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4493 ierr = PetscFree(merge);CHKERRQ(ierr); 4494 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4495 } 4496 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4497 PetscFunctionReturn(0); 4498 } 4499 4500 #include <../src/mat/utils/freespace.h> 4501 #include <petscbt.h> 4502 4503 #undef __FUNCT__ 4504 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4505 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4506 { 4507 PetscErrorCode ierr; 4508 MPI_Comm comm; 4509 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4510 PetscMPIInt size,rank,taga,*len_s; 4511 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4512 PetscInt proc,m; 4513 PetscInt **buf_ri,**buf_rj; 4514 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4515 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4516 MPI_Request *s_waits,*r_waits; 4517 MPI_Status *status; 4518 MatScalar *aa=a->a; 4519 MatScalar **abuf_r,*ba_i; 4520 Mat_Merge_SeqsToMPI *merge; 4521 PetscContainer container; 4522 4523 PetscFunctionBegin; 4524 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4525 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4526 4527 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4528 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4529 4530 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4531 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4532 4533 bi = merge->bi; 4534 bj = merge->bj; 4535 buf_ri = merge->buf_ri; 4536 buf_rj = merge->buf_rj; 4537 4538 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4539 owners = merge->rowmap->range; 4540 len_s = merge->len_s; 4541 4542 /* send and recv matrix values */ 4543 /*-----------------------------*/ 4544 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4545 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4546 4547 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4548 for (proc=0,k=0; proc<size; proc++) { 4549 if (!len_s[proc]) continue; 4550 i = owners[proc]; 4551 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4552 k++; 4553 } 4554 4555 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4556 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4557 ierr = PetscFree(status);CHKERRQ(ierr); 4558 4559 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4560 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4561 4562 /* insert mat values of mpimat */ 4563 /*----------------------------*/ 4564 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4565 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4566 4567 for (k=0; k<merge->nrecv; k++) { 4568 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4569 nrows = *(buf_ri_k[k]); 4570 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4571 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4572 } 4573 4574 /* set values of ba */ 4575 m = merge->rowmap->n; 4576 for (i=0; i<m; i++) { 4577 arow = owners[rank] + i; 4578 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4579 bnzi = bi[i+1] - bi[i]; 4580 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4581 4582 /* add local non-zero vals of this proc's seqmat into ba */ 4583 anzi = ai[arow+1] - ai[arow]; 4584 aj = a->j + ai[arow]; 4585 aa = a->a + ai[arow]; 4586 nextaj = 0; 4587 for (j=0; nextaj<anzi; j++) { 4588 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4589 ba_i[j] += aa[nextaj++]; 4590 } 4591 } 4592 4593 /* add received vals into ba */ 4594 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4595 /* i-th row */ 4596 if (i == *nextrow[k]) { 4597 anzi = *(nextai[k]+1) - *nextai[k]; 4598 aj = buf_rj[k] + *(nextai[k]); 4599 aa = abuf_r[k] + *(nextai[k]); 4600 nextaj = 0; 4601 for (j=0; nextaj<anzi; j++) { 4602 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4603 ba_i[j] += aa[nextaj++]; 4604 } 4605 } 4606 nextrow[k]++; nextai[k]++; 4607 } 4608 } 4609 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4610 } 4611 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4612 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4613 4614 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4615 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4616 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4617 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4618 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4619 PetscFunctionReturn(0); 4620 } 4621 4622 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4623 4624 #undef __FUNCT__ 4625 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4626 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4627 { 4628 PetscErrorCode ierr; 4629 Mat B_mpi; 4630 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4631 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4632 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4633 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4634 PetscInt len,proc,*dnz,*onz,bs,cbs; 4635 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4636 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4637 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4638 MPI_Status *status; 4639 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4640 PetscBT lnkbt; 4641 Mat_Merge_SeqsToMPI *merge; 4642 PetscContainer container; 4643 4644 PetscFunctionBegin; 4645 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4646 4647 /* make sure it is a PETSc comm */ 4648 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4649 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4650 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4651 4652 ierr = PetscNew(&merge);CHKERRQ(ierr); 4653 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4654 4655 /* determine row ownership */ 4656 /*---------------------------------------------------------*/ 4657 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4658 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4659 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4660 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4661 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4662 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4663 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4664 4665 m = merge->rowmap->n; 4666 owners = merge->rowmap->range; 4667 4668 /* determine the number of messages to send, their lengths */ 4669 /*---------------------------------------------------------*/ 4670 len_s = merge->len_s; 4671 4672 len = 0; /* length of buf_si[] */ 4673 merge->nsend = 0; 4674 for (proc=0; proc<size; proc++) { 4675 len_si[proc] = 0; 4676 if (proc == rank) { 4677 len_s[proc] = 0; 4678 } else { 4679 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4680 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4681 } 4682 if (len_s[proc]) { 4683 merge->nsend++; 4684 nrows = 0; 4685 for (i=owners[proc]; i<owners[proc+1]; i++) { 4686 if (ai[i+1] > ai[i]) nrows++; 4687 } 4688 len_si[proc] = 2*(nrows+1); 4689 len += len_si[proc]; 4690 } 4691 } 4692 4693 /* determine the number and length of messages to receive for ij-structure */ 4694 /*-------------------------------------------------------------------------*/ 4695 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4696 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4697 4698 /* post the Irecv of j-structure */ 4699 /*-------------------------------*/ 4700 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4701 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4702 4703 /* post the Isend of j-structure */ 4704 /*--------------------------------*/ 4705 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4706 4707 for (proc=0, k=0; proc<size; proc++) { 4708 if (!len_s[proc]) continue; 4709 i = owners[proc]; 4710 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4711 k++; 4712 } 4713 4714 /* receives and sends of j-structure are complete */ 4715 /*------------------------------------------------*/ 4716 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4717 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4718 4719 /* send and recv i-structure */ 4720 /*---------------------------*/ 4721 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4722 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4723 4724 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4725 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4726 for (proc=0,k=0; proc<size; proc++) { 4727 if (!len_s[proc]) continue; 4728 /* form outgoing message for i-structure: 4729 buf_si[0]: nrows to be sent 4730 [1:nrows]: row index (global) 4731 [nrows+1:2*nrows+1]: i-structure index 4732 */ 4733 /*-------------------------------------------*/ 4734 nrows = len_si[proc]/2 - 1; 4735 buf_si_i = buf_si + nrows+1; 4736 buf_si[0] = nrows; 4737 buf_si_i[0] = 0; 4738 nrows = 0; 4739 for (i=owners[proc]; i<owners[proc+1]; i++) { 4740 anzi = ai[i+1] - ai[i]; 4741 if (anzi) { 4742 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4743 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4744 nrows++; 4745 } 4746 } 4747 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4748 k++; 4749 buf_si += len_si[proc]; 4750 } 4751 4752 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4753 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4754 4755 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4756 for (i=0; i<merge->nrecv; i++) { 4757 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4758 } 4759 4760 ierr = PetscFree(len_si);CHKERRQ(ierr); 4761 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4762 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4763 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4764 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4765 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4766 ierr = PetscFree(status);CHKERRQ(ierr); 4767 4768 /* compute a local seq matrix in each processor */ 4769 /*----------------------------------------------*/ 4770 /* allocate bi array and free space for accumulating nonzero column info */ 4771 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4772 bi[0] = 0; 4773 4774 /* create and initialize a linked list */ 4775 nlnk = N+1; 4776 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4777 4778 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4779 len = ai[owners[rank+1]] - ai[owners[rank]]; 4780 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4781 4782 current_space = free_space; 4783 4784 /* determine symbolic info for each local row */ 4785 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4786 4787 for (k=0; k<merge->nrecv; k++) { 4788 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4789 nrows = *buf_ri_k[k]; 4790 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4791 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4792 } 4793 4794 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4795 len = 0; 4796 for (i=0; i<m; i++) { 4797 bnzi = 0; 4798 /* add local non-zero cols of this proc's seqmat into lnk */ 4799 arow = owners[rank] + i; 4800 anzi = ai[arow+1] - ai[arow]; 4801 aj = a->j + ai[arow]; 4802 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4803 bnzi += nlnk; 4804 /* add received col data into lnk */ 4805 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4806 if (i == *nextrow[k]) { /* i-th row */ 4807 anzi = *(nextai[k]+1) - *nextai[k]; 4808 aj = buf_rj[k] + *nextai[k]; 4809 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4810 bnzi += nlnk; 4811 nextrow[k]++; nextai[k]++; 4812 } 4813 } 4814 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4815 4816 /* if free space is not available, make more free space */ 4817 if (current_space->local_remaining<bnzi) { 4818 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4819 nspacedouble++; 4820 } 4821 /* copy data into free space, then initialize lnk */ 4822 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4823 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4824 4825 current_space->array += bnzi; 4826 current_space->local_used += bnzi; 4827 current_space->local_remaining -= bnzi; 4828 4829 bi[i+1] = bi[i] + bnzi; 4830 } 4831 4832 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4833 4834 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4835 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4836 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4837 4838 /* create symbolic parallel matrix B_mpi */ 4839 /*---------------------------------------*/ 4840 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4841 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4842 if (n==PETSC_DECIDE) { 4843 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4844 } else { 4845 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4846 } 4847 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4848 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4849 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4850 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4851 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4852 4853 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4854 B_mpi->assembled = PETSC_FALSE; 4855 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4856 merge->bi = bi; 4857 merge->bj = bj; 4858 merge->buf_ri = buf_ri; 4859 merge->buf_rj = buf_rj; 4860 merge->coi = NULL; 4861 merge->coj = NULL; 4862 merge->owners_co = NULL; 4863 4864 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4865 4866 /* attach the supporting struct to B_mpi for reuse */ 4867 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4868 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4869 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4870 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4871 *mpimat = B_mpi; 4872 4873 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4874 PetscFunctionReturn(0); 4875 } 4876 4877 #undef __FUNCT__ 4878 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4879 /*@C 4880 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4881 matrices from each processor 4882 4883 Collective on MPI_Comm 4884 4885 Input Parameters: 4886 + comm - the communicators the parallel matrix will live on 4887 . seqmat - the input sequential matrices 4888 . m - number of local rows (or PETSC_DECIDE) 4889 . n - number of local columns (or PETSC_DECIDE) 4890 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4891 4892 Output Parameter: 4893 . mpimat - the parallel matrix generated 4894 4895 Level: advanced 4896 4897 Notes: 4898 The dimensions of the sequential matrix in each processor MUST be the same. 4899 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4900 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4901 @*/ 4902 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4903 { 4904 PetscErrorCode ierr; 4905 PetscMPIInt size; 4906 4907 PetscFunctionBegin; 4908 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4909 if (size == 1) { 4910 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4911 if (scall == MAT_INITIAL_MATRIX) { 4912 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4913 } else { 4914 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4915 } 4916 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4917 PetscFunctionReturn(0); 4918 } 4919 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4920 if (scall == MAT_INITIAL_MATRIX) { 4921 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4922 } 4923 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4924 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4925 PetscFunctionReturn(0); 4926 } 4927 4928 #undef __FUNCT__ 4929 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4930 /*@ 4931 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4932 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4933 with MatGetSize() 4934 4935 Not Collective 4936 4937 Input Parameters: 4938 + A - the matrix 4939 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4940 4941 Output Parameter: 4942 . A_loc - the local sequential matrix generated 4943 4944 Level: developer 4945 4946 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4947 4948 @*/ 4949 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4950 { 4951 PetscErrorCode ierr; 4952 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4953 Mat_SeqAIJ *mat,*a,*b; 4954 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4955 MatScalar *aa,*ba,*cam; 4956 PetscScalar *ca; 4957 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4958 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4959 PetscBool match; 4960 4961 PetscFunctionBegin; 4962 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4963 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4964 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4965 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4966 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4967 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4968 aa = a->a; ba = b->a; 4969 if (scall == MAT_INITIAL_MATRIX) { 4970 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4971 ci[0] = 0; 4972 for (i=0; i<am; i++) { 4973 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4974 } 4975 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4976 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4977 k = 0; 4978 for (i=0; i<am; i++) { 4979 ncols_o = bi[i+1] - bi[i]; 4980 ncols_d = ai[i+1] - ai[i]; 4981 /* off-diagonal portion of A */ 4982 for (jo=0; jo<ncols_o; jo++) { 4983 col = cmap[*bj]; 4984 if (col >= cstart) break; 4985 cj[k] = col; bj++; 4986 ca[k++] = *ba++; 4987 } 4988 /* diagonal portion of A */ 4989 for (j=0; j<ncols_d; j++) { 4990 cj[k] = cstart + *aj++; 4991 ca[k++] = *aa++; 4992 } 4993 /* off-diagonal portion of A */ 4994 for (j=jo; j<ncols_o; j++) { 4995 cj[k] = cmap[*bj++]; 4996 ca[k++] = *ba++; 4997 } 4998 } 4999 /* put together the new matrix */ 5000 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5001 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5002 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5003 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5004 mat->free_a = PETSC_TRUE; 5005 mat->free_ij = PETSC_TRUE; 5006 mat->nonew = 0; 5007 } else if (scall == MAT_REUSE_MATRIX) { 5008 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5009 ci = mat->i; cj = mat->j; cam = mat->a; 5010 for (i=0; i<am; i++) { 5011 /* off-diagonal portion of A */ 5012 ncols_o = bi[i+1] - bi[i]; 5013 for (jo=0; jo<ncols_o; jo++) { 5014 col = cmap[*bj]; 5015 if (col >= cstart) break; 5016 *cam++ = *ba++; bj++; 5017 } 5018 /* diagonal portion of A */ 5019 ncols_d = ai[i+1] - ai[i]; 5020 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5021 /* off-diagonal portion of A */ 5022 for (j=jo; j<ncols_o; j++) { 5023 *cam++ = *ba++; bj++; 5024 } 5025 } 5026 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5027 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5028 PetscFunctionReturn(0); 5029 } 5030 5031 #undef __FUNCT__ 5032 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5033 /*@C 5034 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5035 5036 Not Collective 5037 5038 Input Parameters: 5039 + A - the matrix 5040 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5041 - row, col - index sets of rows and columns to extract (or NULL) 5042 5043 Output Parameter: 5044 . A_loc - the local sequential matrix generated 5045 5046 Level: developer 5047 5048 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5049 5050 @*/ 5051 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5052 { 5053 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5054 PetscErrorCode ierr; 5055 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5056 IS isrowa,iscola; 5057 Mat *aloc; 5058 PetscBool match; 5059 5060 PetscFunctionBegin; 5061 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5062 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5063 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5064 if (!row) { 5065 start = A->rmap->rstart; end = A->rmap->rend; 5066 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5067 } else { 5068 isrowa = *row; 5069 } 5070 if (!col) { 5071 start = A->cmap->rstart; 5072 cmap = a->garray; 5073 nzA = a->A->cmap->n; 5074 nzB = a->B->cmap->n; 5075 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5076 ncols = 0; 5077 for (i=0; i<nzB; i++) { 5078 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5079 else break; 5080 } 5081 imark = i; 5082 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5083 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5084 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5085 } else { 5086 iscola = *col; 5087 } 5088 if (scall != MAT_INITIAL_MATRIX) { 5089 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5090 aloc[0] = *A_loc; 5091 } 5092 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5093 *A_loc = aloc[0]; 5094 ierr = PetscFree(aloc);CHKERRQ(ierr); 5095 if (!row) { 5096 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5097 } 5098 if (!col) { 5099 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5100 } 5101 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5102 PetscFunctionReturn(0); 5103 } 5104 5105 #undef __FUNCT__ 5106 #define __FUNCT__ "MatGetBrowsOfAcols" 5107 /*@C 5108 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5109 5110 Collective on Mat 5111 5112 Input Parameters: 5113 + A,B - the matrices in mpiaij format 5114 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5115 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5116 5117 Output Parameter: 5118 + rowb, colb - index sets of rows and columns of B to extract 5119 - B_seq - the sequential matrix generated 5120 5121 Level: developer 5122 5123 @*/ 5124 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5125 { 5126 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5127 PetscErrorCode ierr; 5128 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5129 IS isrowb,iscolb; 5130 Mat *bseq=NULL; 5131 5132 PetscFunctionBegin; 5133 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5134 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5135 } 5136 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5137 5138 if (scall == MAT_INITIAL_MATRIX) { 5139 start = A->cmap->rstart; 5140 cmap = a->garray; 5141 nzA = a->A->cmap->n; 5142 nzB = a->B->cmap->n; 5143 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5144 ncols = 0; 5145 for (i=0; i<nzB; i++) { /* row < local row index */ 5146 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5147 else break; 5148 } 5149 imark = i; 5150 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5151 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5152 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5153 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5154 } else { 5155 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5156 isrowb = *rowb; iscolb = *colb; 5157 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5158 bseq[0] = *B_seq; 5159 } 5160 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5161 *B_seq = bseq[0]; 5162 ierr = PetscFree(bseq);CHKERRQ(ierr); 5163 if (!rowb) { 5164 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5165 } else { 5166 *rowb = isrowb; 5167 } 5168 if (!colb) { 5169 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5170 } else { 5171 *colb = iscolb; 5172 } 5173 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5174 PetscFunctionReturn(0); 5175 } 5176 5177 #undef __FUNCT__ 5178 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5179 /* 5180 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5181 of the OFF-DIAGONAL portion of local A 5182 5183 Collective on Mat 5184 5185 Input Parameters: 5186 + A,B - the matrices in mpiaij format 5187 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5188 5189 Output Parameter: 5190 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5191 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5192 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5193 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5194 5195 Level: developer 5196 5197 */ 5198 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5199 { 5200 VecScatter_MPI_General *gen_to,*gen_from; 5201 PetscErrorCode ierr; 5202 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5203 Mat_SeqAIJ *b_oth; 5204 VecScatter ctx =a->Mvctx; 5205 MPI_Comm comm; 5206 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5207 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5208 PetscScalar *rvalues,*svalues; 5209 MatScalar *b_otha,*bufa,*bufA; 5210 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5211 MPI_Request *rwaits = NULL,*swaits = NULL; 5212 MPI_Status *sstatus,rstatus; 5213 PetscMPIInt jj; 5214 PetscInt *cols,sbs,rbs; 5215 PetscScalar *vals; 5216 5217 PetscFunctionBegin; 5218 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5219 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5220 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5221 } 5222 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5223 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5224 5225 gen_to = (VecScatter_MPI_General*)ctx->todata; 5226 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5227 rvalues = gen_from->values; /* holds the length of receiving row */ 5228 svalues = gen_to->values; /* holds the length of sending row */ 5229 nrecvs = gen_from->n; 5230 nsends = gen_to->n; 5231 5232 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5233 srow = gen_to->indices; /* local row index to be sent */ 5234 sstarts = gen_to->starts; 5235 sprocs = gen_to->procs; 5236 sstatus = gen_to->sstatus; 5237 sbs = gen_to->bs; 5238 rstarts = gen_from->starts; 5239 rprocs = gen_from->procs; 5240 rbs = gen_from->bs; 5241 5242 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5243 if (scall == MAT_INITIAL_MATRIX) { 5244 /* i-array */ 5245 /*---------*/ 5246 /* post receives */ 5247 for (i=0; i<nrecvs; i++) { 5248 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5249 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5250 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5251 } 5252 5253 /* pack the outgoing message */ 5254 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5255 5256 sstartsj[0] = 0; 5257 rstartsj[0] = 0; 5258 len = 0; /* total length of j or a array to be sent */ 5259 k = 0; 5260 for (i=0; i<nsends; i++) { 5261 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5262 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5263 for (j=0; j<nrows; j++) { 5264 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5265 for (l=0; l<sbs; l++) { 5266 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5267 5268 rowlen[j*sbs+l] = ncols; 5269 5270 len += ncols; 5271 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5272 } 5273 k++; 5274 } 5275 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5276 5277 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5278 } 5279 /* recvs and sends of i-array are completed */ 5280 i = nrecvs; 5281 while (i--) { 5282 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5283 } 5284 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5285 5286 /* allocate buffers for sending j and a arrays */ 5287 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5288 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5289 5290 /* create i-array of B_oth */ 5291 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5292 5293 b_othi[0] = 0; 5294 len = 0; /* total length of j or a array to be received */ 5295 k = 0; 5296 for (i=0; i<nrecvs; i++) { 5297 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5298 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5299 for (j=0; j<nrows; j++) { 5300 b_othi[k+1] = b_othi[k] + rowlen[j]; 5301 len += rowlen[j]; k++; 5302 } 5303 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5304 } 5305 5306 /* allocate space for j and a arrrays of B_oth */ 5307 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5308 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5309 5310 /* j-array */ 5311 /*---------*/ 5312 /* post receives of j-array */ 5313 for (i=0; i<nrecvs; i++) { 5314 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5315 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5316 } 5317 5318 /* pack the outgoing message j-array */ 5319 k = 0; 5320 for (i=0; i<nsends; i++) { 5321 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5322 bufJ = bufj+sstartsj[i]; 5323 for (j=0; j<nrows; j++) { 5324 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5325 for (ll=0; ll<sbs; ll++) { 5326 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5327 for (l=0; l<ncols; l++) { 5328 *bufJ++ = cols[l]; 5329 } 5330 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5331 } 5332 } 5333 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5334 } 5335 5336 /* recvs and sends of j-array are completed */ 5337 i = nrecvs; 5338 while (i--) { 5339 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5340 } 5341 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5342 } else if (scall == MAT_REUSE_MATRIX) { 5343 sstartsj = *startsj_s; 5344 rstartsj = *startsj_r; 5345 bufa = *bufa_ptr; 5346 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5347 b_otha = b_oth->a; 5348 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5349 5350 /* a-array */ 5351 /*---------*/ 5352 /* post receives of a-array */ 5353 for (i=0; i<nrecvs; i++) { 5354 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5355 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5356 } 5357 5358 /* pack the outgoing message a-array */ 5359 k = 0; 5360 for (i=0; i<nsends; i++) { 5361 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5362 bufA = bufa+sstartsj[i]; 5363 for (j=0; j<nrows; j++) { 5364 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5365 for (ll=0; ll<sbs; ll++) { 5366 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5367 for (l=0; l<ncols; l++) { 5368 *bufA++ = vals[l]; 5369 } 5370 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5371 } 5372 } 5373 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5374 } 5375 /* recvs and sends of a-array are completed */ 5376 i = nrecvs; 5377 while (i--) { 5378 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5379 } 5380 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5381 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5382 5383 if (scall == MAT_INITIAL_MATRIX) { 5384 /* put together the new matrix */ 5385 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5386 5387 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5388 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5389 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5390 b_oth->free_a = PETSC_TRUE; 5391 b_oth->free_ij = PETSC_TRUE; 5392 b_oth->nonew = 0; 5393 5394 ierr = PetscFree(bufj);CHKERRQ(ierr); 5395 if (!startsj_s || !bufa_ptr) { 5396 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5397 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5398 } else { 5399 *startsj_s = sstartsj; 5400 *startsj_r = rstartsj; 5401 *bufa_ptr = bufa; 5402 } 5403 } 5404 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5405 PetscFunctionReturn(0); 5406 } 5407 5408 #undef __FUNCT__ 5409 #define __FUNCT__ "MatGetCommunicationStructs" 5410 /*@C 5411 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5412 5413 Not Collective 5414 5415 Input Parameters: 5416 . A - The matrix in mpiaij format 5417 5418 Output Parameter: 5419 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5420 . colmap - A map from global column index to local index into lvec 5421 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5422 5423 Level: developer 5424 5425 @*/ 5426 #if defined(PETSC_USE_CTABLE) 5427 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5428 #else 5429 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5430 #endif 5431 { 5432 Mat_MPIAIJ *a; 5433 5434 PetscFunctionBegin; 5435 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5436 PetscValidPointer(lvec, 2); 5437 PetscValidPointer(colmap, 3); 5438 PetscValidPointer(multScatter, 4); 5439 a = (Mat_MPIAIJ*) A->data; 5440 if (lvec) *lvec = a->lvec; 5441 if (colmap) *colmap = a->colmap; 5442 if (multScatter) *multScatter = a->Mvctx; 5443 PetscFunctionReturn(0); 5444 } 5445 5446 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5447 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5448 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5449 5450 #undef __FUNCT__ 5451 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5452 /* 5453 Computes (B'*A')' since computing B*A directly is untenable 5454 5455 n p p 5456 ( ) ( ) ( ) 5457 m ( A ) * n ( B ) = m ( C ) 5458 ( ) ( ) ( ) 5459 5460 */ 5461 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5462 { 5463 PetscErrorCode ierr; 5464 Mat At,Bt,Ct; 5465 5466 PetscFunctionBegin; 5467 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5468 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5469 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5470 ierr = MatDestroy(&At);CHKERRQ(ierr); 5471 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5472 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5473 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5474 PetscFunctionReturn(0); 5475 } 5476 5477 #undef __FUNCT__ 5478 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5479 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5480 { 5481 PetscErrorCode ierr; 5482 PetscInt m=A->rmap->n,n=B->cmap->n; 5483 Mat Cmat; 5484 5485 PetscFunctionBegin; 5486 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5487 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5488 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5489 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5490 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5491 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5492 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5493 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5494 5495 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5496 5497 *C = Cmat; 5498 PetscFunctionReturn(0); 5499 } 5500 5501 /* ----------------------------------------------------------------*/ 5502 #undef __FUNCT__ 5503 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5504 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5505 { 5506 PetscErrorCode ierr; 5507 5508 PetscFunctionBegin; 5509 if (scall == MAT_INITIAL_MATRIX) { 5510 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5511 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5512 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5513 } 5514 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5515 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5516 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5517 PetscFunctionReturn(0); 5518 } 5519 5520 #if defined(PETSC_HAVE_MUMPS) 5521 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5522 #endif 5523 #if defined(PETSC_HAVE_PASTIX) 5524 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5525 #endif 5526 #if defined(PETSC_HAVE_SUPERLU_DIST) 5527 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5528 #endif 5529 #if defined(PETSC_HAVE_CLIQUE) 5530 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5531 #endif 5532 5533 /*MC 5534 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5535 5536 Options Database Keys: 5537 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5538 5539 Level: beginner 5540 5541 .seealso: MatCreateAIJ() 5542 M*/ 5543 5544 #undef __FUNCT__ 5545 #define __FUNCT__ "MatCreate_MPIAIJ" 5546 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5547 { 5548 Mat_MPIAIJ *b; 5549 PetscErrorCode ierr; 5550 PetscMPIInt size; 5551 5552 PetscFunctionBegin; 5553 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5554 5555 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5556 B->data = (void*)b; 5557 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5558 B->assembled = PETSC_FALSE; 5559 B->insertmode = NOT_SET_VALUES; 5560 b->size = size; 5561 5562 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5563 5564 /* build cache for off array entries formed */ 5565 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5566 5567 b->donotstash = PETSC_FALSE; 5568 b->colmap = 0; 5569 b->garray = 0; 5570 b->roworiented = PETSC_TRUE; 5571 5572 /* stuff used for matrix vector multiply */ 5573 b->lvec = NULL; 5574 b->Mvctx = NULL; 5575 5576 /* stuff for MatGetRow() */ 5577 b->rowindices = 0; 5578 b->rowvalues = 0; 5579 b->getrowactive = PETSC_FALSE; 5580 5581 /* flexible pointer used in CUSP/CUSPARSE classes */ 5582 b->spptr = NULL; 5583 5584 #if defined(PETSC_HAVE_MUMPS) 5585 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5586 #endif 5587 #if defined(PETSC_HAVE_PASTIX) 5588 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5589 #endif 5590 #if defined(PETSC_HAVE_SUPERLU_DIST) 5591 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5592 #endif 5593 #if defined(PETSC_HAVE_CLIQUE) 5594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5595 #endif 5596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5610 PetscFunctionReturn(0); 5611 } 5612 5613 #undef __FUNCT__ 5614 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5615 /*@ 5616 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5617 and "off-diagonal" part of the matrix in CSR format. 5618 5619 Collective on MPI_Comm 5620 5621 Input Parameters: 5622 + comm - MPI communicator 5623 . m - number of local rows (Cannot be PETSC_DECIDE) 5624 . n - This value should be the same as the local size used in creating the 5625 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5626 calculated if N is given) For square matrices n is almost always m. 5627 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5628 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5629 . i - row indices for "diagonal" portion of matrix 5630 . j - column indices 5631 . a - matrix values 5632 . oi - row indices for "off-diagonal" portion of matrix 5633 . oj - column indices 5634 - oa - matrix values 5635 5636 Output Parameter: 5637 . mat - the matrix 5638 5639 Level: advanced 5640 5641 Notes: 5642 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5643 must free the arrays once the matrix has been destroyed and not before. 5644 5645 The i and j indices are 0 based 5646 5647 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5648 5649 This sets local rows and cannot be used to set off-processor values. 5650 5651 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5652 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5653 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5654 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5655 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5656 communication if it is known that only local entries will be set. 5657 5658 .keywords: matrix, aij, compressed row, sparse, parallel 5659 5660 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5661 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5662 @*/ 5663 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5664 { 5665 PetscErrorCode ierr; 5666 Mat_MPIAIJ *maij; 5667 5668 PetscFunctionBegin; 5669 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5670 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5671 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5672 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5673 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5674 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5675 maij = (Mat_MPIAIJ*) (*mat)->data; 5676 5677 (*mat)->preallocated = PETSC_TRUE; 5678 5679 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5680 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5681 5682 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5683 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5684 5685 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5686 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5687 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5688 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5689 5690 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5691 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5692 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5693 PetscFunctionReturn(0); 5694 } 5695 5696 /* 5697 Special version for direct calls from Fortran 5698 */ 5699 #include <petsc-private/fortranimpl.h> 5700 5701 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5702 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5703 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5704 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5705 #endif 5706 5707 /* Change these macros so can be used in void function */ 5708 #undef CHKERRQ 5709 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5710 #undef SETERRQ2 5711 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5712 #undef SETERRQ3 5713 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5714 #undef SETERRQ 5715 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5716 5717 #undef __FUNCT__ 5718 #define __FUNCT__ "matsetvaluesmpiaij_" 5719 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5720 { 5721 Mat mat = *mmat; 5722 PetscInt m = *mm, n = *mn; 5723 InsertMode addv = *maddv; 5724 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5725 PetscScalar value; 5726 PetscErrorCode ierr; 5727 5728 MatCheckPreallocated(mat,1); 5729 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5730 5731 #if defined(PETSC_USE_DEBUG) 5732 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5733 #endif 5734 { 5735 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5736 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5737 PetscBool roworiented = aij->roworiented; 5738 5739 /* Some Variables required in the macro */ 5740 Mat A = aij->A; 5741 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5742 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5743 MatScalar *aa = a->a; 5744 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5745 Mat B = aij->B; 5746 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5747 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5748 MatScalar *ba = b->a; 5749 5750 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5751 PetscInt nonew = a->nonew; 5752 MatScalar *ap1,*ap2; 5753 5754 PetscFunctionBegin; 5755 for (i=0; i<m; i++) { 5756 if (im[i] < 0) continue; 5757 #if defined(PETSC_USE_DEBUG) 5758 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5759 #endif 5760 if (im[i] >= rstart && im[i] < rend) { 5761 row = im[i] - rstart; 5762 lastcol1 = -1; 5763 rp1 = aj + ai[row]; 5764 ap1 = aa + ai[row]; 5765 rmax1 = aimax[row]; 5766 nrow1 = ailen[row]; 5767 low1 = 0; 5768 high1 = nrow1; 5769 lastcol2 = -1; 5770 rp2 = bj + bi[row]; 5771 ap2 = ba + bi[row]; 5772 rmax2 = bimax[row]; 5773 nrow2 = bilen[row]; 5774 low2 = 0; 5775 high2 = nrow2; 5776 5777 for (j=0; j<n; j++) { 5778 if (roworiented) value = v[i*n+j]; 5779 else value = v[i+j*m]; 5780 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5781 if (in[j] >= cstart && in[j] < cend) { 5782 col = in[j] - cstart; 5783 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5784 } else if (in[j] < 0) continue; 5785 #if defined(PETSC_USE_DEBUG) 5786 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5787 #endif 5788 else { 5789 if (mat->was_assembled) { 5790 if (!aij->colmap) { 5791 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5792 } 5793 #if defined(PETSC_USE_CTABLE) 5794 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5795 col--; 5796 #else 5797 col = aij->colmap[in[j]] - 1; 5798 #endif 5799 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5800 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5801 col = in[j]; 5802 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5803 B = aij->B; 5804 b = (Mat_SeqAIJ*)B->data; 5805 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5806 rp2 = bj + bi[row]; 5807 ap2 = ba + bi[row]; 5808 rmax2 = bimax[row]; 5809 nrow2 = bilen[row]; 5810 low2 = 0; 5811 high2 = nrow2; 5812 bm = aij->B->rmap->n; 5813 ba = b->a; 5814 } 5815 } else col = in[j]; 5816 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5817 } 5818 } 5819 } else if (!aij->donotstash) { 5820 if (roworiented) { 5821 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5822 } else { 5823 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5824 } 5825 } 5826 } 5827 } 5828 PetscFunctionReturnVoid(); 5829 } 5830 5831