1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 for (i=0; i<m; i++) { 502 if (im[i] < 0) continue; 503 #if defined(PETSC_USE_DEBUG) 504 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 505 #endif 506 if (im[i] >= rstart && im[i] < rend) { 507 row = im[i] - rstart; 508 lastcol1 = -1; 509 rp1 = aj + ai[row]; 510 ap1 = aa + ai[row]; 511 rmax1 = aimax[row]; 512 nrow1 = ailen[row]; 513 low1 = 0; 514 high1 = nrow1; 515 lastcol2 = -1; 516 rp2 = bj + bi[row]; 517 ap2 = ba + bi[row]; 518 rmax2 = bimax[row]; 519 nrow2 = bilen[row]; 520 low2 = 0; 521 high2 = nrow2; 522 523 for (j=0; j<n; j++) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend) { 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined(PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend) { 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt nstash,reallocs; 633 InsertMode addv; 634 635 PetscFunctionBegin; 636 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 637 638 /* make sure all processors are either in INSERTMODE or ADDMODE */ 639 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 641 mat->insertmode = addv; /* in case this processor had no cache */ 642 643 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 644 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 645 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 655 PetscErrorCode ierr; 656 PetscMPIInt n; 657 PetscInt i,j,rstart,ncols,flg; 658 PetscInt *row,*col; 659 PetscBool other_disassembled; 660 PetscScalar *val; 661 InsertMode addv = mat->insertmode; 662 663 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 664 665 PetscFunctionBegin; 666 if (!aij->donotstash && !mat->nooffprocentries) { 667 while (1) { 668 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 669 if (!flg) break; 670 671 for (i=0; i<n; ) { 672 /* Now identify the consecutive vals belonging to the same row */ 673 for (j=i,rstart=row[j]; j<n; j++) { 674 if (row[j] != rstart) break; 675 } 676 if (j < n) ncols = j-i; 677 else ncols = n-i; 678 /* Now assemble all these values with a single function call */ 679 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 680 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 707 708 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 709 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 719 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 720 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 721 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 722 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 730 { 731 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 736 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroRows_MPIAIJ" 742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 743 { 744 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 745 PetscInt *owners = A->rmap->range; 746 PetscInt n = A->rmap->n; 747 PetscMPIInt size = mat->size; 748 PetscSF sf; 749 PetscInt *lrows; 750 PetscSFNode *rrows; 751 PetscInt lastidx = -1, r, p = 0, len = 0; 752 PetscErrorCode ierr; 753 754 PetscFunctionBegin; 755 /* Create SF where leaves are input rows and roots are owned rows */ 756 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 757 for (r = 0; r < n; ++r) lrows[r] = -1; 758 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 759 for (r = 0; r < N; ++r) { 760 const PetscInt idx = rows[r]; 761 PetscBool found = PETSC_FALSE; 762 /* Trick for efficient searching for sorted rows */ 763 if (lastidx > idx) p = 0; 764 lastidx = idx; 765 for (; p < size; ++p) { 766 if (idx >= owners[p] && idx < owners[p+1]) { 767 rrows[r].rank = p; 768 rrows[r].index = rows[r] - owners[p]; 769 found = PETSC_TRUE; 770 break; 771 } 772 } 773 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 774 } 775 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 776 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 777 /* Collect flags for rows to be zeroed */ 778 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 779 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 780 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 781 /* Compress and put in row numbers */ 782 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 783 /* fix right hand side if needed */ 784 if (x && b) { 785 const PetscScalar *xx; 786 PetscScalar *bb; 787 788 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 789 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 790 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 791 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 792 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 793 } 794 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 795 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 796 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 823 { 824 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 825 PetscErrorCode ierr; 826 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 827 PetscInt i,j,r,m,p = 0,len = 0; 828 PetscInt *lrows,*owners = A->rmap->range; 829 PetscSFNode *rrows; 830 PetscSF sf; 831 const PetscScalar *xx; 832 PetscScalar *bb,*mask; 833 Vec xmask,lmask; 834 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 835 const PetscInt *aj, *ii,*ridx; 836 PetscScalar *aa; 837 #if defined(PETSC_DEBUG) 838 PetscBool found = PETSC_FALSE; 839 #endif 840 841 PetscFunctionBegin; 842 /* Create SF where leaves are input rows and roots are owned rows */ 843 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 844 for (r = 0; r < n; ++r) lrows[r] = -1; 845 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 846 for (r = 0; r < N; ++r) { 847 const PetscInt idx = rows[r]; 848 PetscBool found = PETSC_FALSE; 849 /* Trick for efficient searching for sorted rows */ 850 if (lastidx > idx) p = 0; 851 lastidx = idx; 852 for (; p < size; ++p) { 853 if (idx >= owners[p] && idx < owners[p+1]) { 854 rrows[r].rank = p; 855 rrows[r].index = rows[r] - owners[p]; 856 found = PETSC_TRUE; 857 break; 858 } 859 } 860 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 861 } 862 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 863 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 864 /* Collect flags for rows to be zeroed */ 865 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 866 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 867 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 868 /* Compress and put in row numbers */ 869 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870 /* zero diagonal part of matrix */ 871 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 872 /* handle off diagonal part of matrix */ 873 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 874 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 875 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 876 for (i=0; i<len; i++) bb[lrows[i]] = 1; 877 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 878 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 881 if (x) { 882 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 884 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 885 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 886 } 887 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 888 /* remove zeroed rows of off diagonal matrix */ 889 ii = aij->i; 890 for (i=0; i<len; i++) { 891 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 /* loop over all elements of off process part of matrix zeroing removed columns*/ 894 if (aij->compressedrow.use) { 895 m = aij->compressedrow.nrows; 896 ii = aij->compressedrow.i; 897 ridx = aij->compressedrow.rindex; 898 for (i=0; i<m; i++) { 899 n = ii[i+1] - ii[i]; 900 aj = aij->j + ii[i]; 901 aa = aij->a + ii[i]; 902 903 for (j=0; j<n; j++) { 904 if (PetscAbsScalar(mask[*aj])) { 905 if (b) bb[*ridx] -= *aa*xx[*aj]; 906 *aa = 0.0; 907 } 908 aa++; 909 aj++; 910 } 911 ridx++; 912 } 913 } else { /* do not use compressed row format */ 914 m = l->B->rmap->n; 915 for (i=0; i<m; i++) { 916 n = ii[i+1] - ii[i]; 917 aj = aij->j + ii[i]; 918 aa = aij->a + ii[i]; 919 for (j=0; j<n; j++) { 920 if (PetscAbsScalar(mask[*aj])) { 921 if (b) bb[i] -= *aa*xx[*aj]; 922 *aa = 0.0; 923 } 924 aa++; 925 aj++; 926 } 927 } 928 } 929 if (x) { 930 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 931 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 932 } 933 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 934 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 935 ierr = PetscFree(lrows);CHKERRQ(ierr); 936 937 /* only change matrix nonzero state if pattern was allowed to be changed */ 938 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 939 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 940 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 941 } 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatMult_MPIAIJ" 947 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 948 { 949 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 950 PetscErrorCode ierr; 951 PetscInt nt; 952 953 PetscFunctionBegin; 954 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 955 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 956 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 957 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 958 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 965 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 966 { 967 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 968 PetscErrorCode ierr; 969 970 PetscFunctionBegin; 971 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatMultAdd_MPIAIJ" 977 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 982 PetscFunctionBegin; 983 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 985 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 992 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 993 { 994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 995 PetscErrorCode ierr; 996 PetscBool merged; 997 998 PetscFunctionBegin; 999 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1000 /* do nondiagonal part */ 1001 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1002 if (!merged) { 1003 /* send it on its way */ 1004 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1005 /* do local part */ 1006 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* receive remote parts: note this assumes the values are not actually */ 1008 /* added in yy until the next line, */ 1009 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1010 } else { 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* send it on its way */ 1014 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1015 /* values actually were received in the Begin() but we need to call this nop */ 1016 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1017 } 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1023 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1024 { 1025 MPI_Comm comm; 1026 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1027 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1028 IS Me,Notme; 1029 PetscErrorCode ierr; 1030 PetscInt M,N,first,last,*notme,i; 1031 PetscMPIInt size; 1032 1033 PetscFunctionBegin; 1034 /* Easy test: symmetric diagonal block */ 1035 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1036 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1037 if (!*f) PetscFunctionReturn(0); 1038 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1039 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1040 if (size == 1) PetscFunctionReturn(0); 1041 1042 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1043 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1044 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1045 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1046 for (i=0; i<first; i++) notme[i] = i; 1047 for (i=last; i<M; i++) notme[i-last+first] = i; 1048 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1049 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1050 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1051 Aoff = Aoffs[0]; 1052 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1053 Boff = Boffs[0]; 1054 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1055 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1056 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1057 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1058 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1059 ierr = PetscFree(notme);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1065 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1066 { 1067 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1068 PetscErrorCode ierr; 1069 1070 PetscFunctionBegin; 1071 /* do nondiagonal part */ 1072 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1073 /* send it on its way */ 1074 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1075 /* do local part */ 1076 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1077 /* receive remote parts */ 1078 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 /* 1083 This only works correctly for square matrices where the subblock A->A is the 1084 diagonal block 1085 */ 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1088 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1089 { 1090 PetscErrorCode ierr; 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 1093 PetscFunctionBegin; 1094 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1095 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1096 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatScale_MPIAIJ" 1102 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 1107 PetscFunctionBegin; 1108 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1109 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatDestroy_Redundant" 1115 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1116 { 1117 PetscErrorCode ierr; 1118 Mat_Redundant *redund = *redundant; 1119 PetscInt i; 1120 1121 PetscFunctionBegin; 1122 *redundant = NULL; 1123 if (redund){ 1124 if (redund->matseq) { /* via MatGetSubMatrices() */ 1125 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1126 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1127 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1128 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1129 } else { 1130 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1131 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1132 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1133 for (i=0; i<redund->nrecvs; i++) { 1134 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1135 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1136 } 1137 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1138 } 1139 1140 if (redund->psubcomm) { 1141 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1142 } 1143 ierr = PetscFree(redund);CHKERRQ(ierr); 1144 } 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatDestroy_MPIAIJ" 1150 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1151 { 1152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 #if defined(PETSC_USE_LOG) 1157 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1158 #endif 1159 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1160 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1161 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1162 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1163 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1164 #if defined(PETSC_USE_CTABLE) 1165 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1166 #else 1167 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1168 #endif 1169 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1170 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1171 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1172 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1173 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1174 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1175 1176 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1181 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1182 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1190 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1191 { 1192 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1193 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1194 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1195 PetscErrorCode ierr; 1196 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1197 int fd; 1198 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1199 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1200 PetscScalar *column_values; 1201 PetscInt message_count,flowcontrolcount; 1202 FILE *file; 1203 1204 PetscFunctionBegin; 1205 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1206 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1207 nz = A->nz + B->nz; 1208 if (!rank) { 1209 header[0] = MAT_FILE_CLASSID; 1210 header[1] = mat->rmap->N; 1211 header[2] = mat->cmap->N; 1212 1213 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1214 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1215 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1216 /* get largest number of rows any processor has */ 1217 rlen = mat->rmap->n; 1218 range = mat->rmap->range; 1219 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1220 } else { 1221 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1222 rlen = mat->rmap->n; 1223 } 1224 1225 /* load up the local row counts */ 1226 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1227 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1228 1229 /* store the row lengths to the file */ 1230 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1231 if (!rank) { 1232 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1233 for (i=1; i<size; i++) { 1234 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1235 rlen = range[i+1] - range[i]; 1236 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1237 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1238 } 1239 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1240 } else { 1241 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1242 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1243 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1244 } 1245 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1246 1247 /* load up the local column indices */ 1248 nzmax = nz; /* th processor needs space a largest processor needs */ 1249 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1250 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1251 cnt = 0; 1252 for (i=0; i<mat->rmap->n; i++) { 1253 for (j=B->i[i]; j<B->i[i+1]; j++) { 1254 if ((col = garray[B->j[j]]) > cstart) break; 1255 column_indices[cnt++] = col; 1256 } 1257 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1258 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1259 } 1260 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1261 1262 /* store the column indices to the file */ 1263 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1264 if (!rank) { 1265 MPI_Status status; 1266 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1267 for (i=1; i<size; i++) { 1268 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1269 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1270 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1271 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1272 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1273 } 1274 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1275 } else { 1276 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1277 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1278 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1279 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1280 } 1281 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1282 1283 /* load up the local column values */ 1284 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1285 cnt = 0; 1286 for (i=0; i<mat->rmap->n; i++) { 1287 for (j=B->i[i]; j<B->i[i+1]; j++) { 1288 if (garray[B->j[j]] > cstart) break; 1289 column_values[cnt++] = B->a[j]; 1290 } 1291 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1292 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1293 } 1294 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1295 1296 /* store the column values to the file */ 1297 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1298 if (!rank) { 1299 MPI_Status status; 1300 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1301 for (i=1; i<size; i++) { 1302 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1303 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1304 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1305 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1306 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1307 } 1308 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1309 } else { 1310 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1311 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1312 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1313 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1314 } 1315 ierr = PetscFree(column_values);CHKERRQ(ierr); 1316 1317 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1318 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #include <petscdraw.h> 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1325 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1326 { 1327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1328 PetscErrorCode ierr; 1329 PetscMPIInt rank = aij->rank,size = aij->size; 1330 PetscBool isdraw,iascii,isbinary; 1331 PetscViewer sviewer; 1332 PetscViewerFormat format; 1333 1334 PetscFunctionBegin; 1335 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1336 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1337 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1338 if (iascii) { 1339 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1340 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1341 MatInfo info; 1342 PetscBool inodes; 1343 1344 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1345 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1346 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1347 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1348 if (!inodes) { 1349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1350 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1351 } else { 1352 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1353 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1354 } 1355 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1356 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1357 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1358 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1359 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1360 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1361 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1362 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1365 PetscInt inodecount,inodelimit,*inodes; 1366 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1367 if (inodes) { 1368 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1369 } else { 1370 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1371 } 1372 PetscFunctionReturn(0); 1373 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1374 PetscFunctionReturn(0); 1375 } 1376 } else if (isbinary) { 1377 if (size == 1) { 1378 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1379 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1380 } else { 1381 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1382 } 1383 PetscFunctionReturn(0); 1384 } else if (isdraw) { 1385 PetscDraw draw; 1386 PetscBool isnull; 1387 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1388 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1389 } 1390 1391 { 1392 /* assemble the entire matrix onto first processor. */ 1393 Mat A; 1394 Mat_SeqAIJ *Aloc; 1395 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1396 MatScalar *a; 1397 1398 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1399 if (!rank) { 1400 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1401 } else { 1402 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1403 } 1404 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1405 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1406 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1407 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1408 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1409 1410 /* copy over the A part */ 1411 Aloc = (Mat_SeqAIJ*)aij->A->data; 1412 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1413 row = mat->rmap->rstart; 1414 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1415 for (i=0; i<m; i++) { 1416 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1417 row++; 1418 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1419 } 1420 aj = Aloc->j; 1421 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1422 1423 /* copy over the B part */ 1424 Aloc = (Mat_SeqAIJ*)aij->B->data; 1425 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1426 row = mat->rmap->rstart; 1427 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1428 ct = cols; 1429 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1430 for (i=0; i<m; i++) { 1431 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1432 row++; 1433 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1434 } 1435 ierr = PetscFree(ct);CHKERRQ(ierr); 1436 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1437 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1438 /* 1439 Everyone has to call to draw the matrix since the graphics waits are 1440 synchronized across all processors that share the PetscDraw object 1441 */ 1442 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1443 if (!rank) { 1444 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1445 } 1446 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1447 ierr = MatDestroy(&A);CHKERRQ(ierr); 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatView_MPIAIJ" 1454 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1455 { 1456 PetscErrorCode ierr; 1457 PetscBool iascii,isdraw,issocket,isbinary; 1458 1459 PetscFunctionBegin; 1460 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1461 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1463 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1464 if (iascii || isdraw || isbinary || issocket) { 1465 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1466 } 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatSOR_MPIAIJ" 1472 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1473 { 1474 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1475 PetscErrorCode ierr; 1476 Vec bb1 = 0; 1477 PetscBool hasop; 1478 1479 PetscFunctionBegin; 1480 if (flag == SOR_APPLY_UPPER) { 1481 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1486 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1487 } 1488 1489 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1490 if (flag & SOR_ZERO_INITIAL_GUESS) { 1491 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1492 its--; 1493 } 1494 1495 while (its--) { 1496 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 1499 /* update rhs: bb1 = bb - B*x */ 1500 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1502 1503 /* local sweep */ 1504 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1505 } 1506 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1507 if (flag & SOR_ZERO_INITIAL_GUESS) { 1508 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1509 its--; 1510 } 1511 while (its--) { 1512 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1513 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 1515 /* update rhs: bb1 = bb - B*x */ 1516 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1517 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1518 1519 /* local sweep */ 1520 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1521 } 1522 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1523 if (flag & SOR_ZERO_INITIAL_GUESS) { 1524 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1525 its--; 1526 } 1527 while (its--) { 1528 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1529 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 1531 /* update rhs: bb1 = bb - B*x */ 1532 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1533 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1534 1535 /* local sweep */ 1536 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1537 } 1538 } else if (flag & SOR_EISENSTAT) { 1539 Vec xx1; 1540 1541 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1542 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1543 1544 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1546 if (!mat->diag) { 1547 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1548 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1549 } 1550 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1551 if (hasop) { 1552 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1553 } else { 1554 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1555 } 1556 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1557 1558 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1559 1560 /* local sweep */ 1561 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1562 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1563 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1564 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1565 1566 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatPermute_MPIAIJ" 1572 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1573 { 1574 Mat aA,aB,Aperm; 1575 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1576 PetscScalar *aa,*ba; 1577 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1578 PetscSF rowsf,sf; 1579 IS parcolp = NULL; 1580 PetscBool done; 1581 PetscErrorCode ierr; 1582 1583 PetscFunctionBegin; 1584 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1585 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1586 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1587 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1588 1589 /* Invert row permutation to find out where my rows should go */ 1590 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1591 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1592 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1593 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1594 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1595 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1596 1597 /* Invert column permutation to find out where my columns should go */ 1598 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1599 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1600 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1601 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1602 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1603 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1604 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1605 1606 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1607 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1608 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1609 1610 /* Find out where my gcols should go */ 1611 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1612 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1613 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1614 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1615 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1616 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1617 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1618 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1619 1620 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1621 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1622 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1623 for (i=0; i<m; i++) { 1624 PetscInt row = rdest[i],rowner; 1625 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1626 for (j=ai[i]; j<ai[i+1]; j++) { 1627 PetscInt cowner,col = cdest[aj[j]]; 1628 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1629 if (rowner == cowner) dnnz[i]++; 1630 else onnz[i]++; 1631 } 1632 for (j=bi[i]; j<bi[i+1]; j++) { 1633 PetscInt cowner,col = gcdest[bj[j]]; 1634 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1635 if (rowner == cowner) dnnz[i]++; 1636 else onnz[i]++; 1637 } 1638 } 1639 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1640 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1641 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1642 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1643 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1644 1645 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1646 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1647 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) { 1649 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1650 PetscInt j0,rowlen; 1651 rowlen = ai[i+1] - ai[i]; 1652 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1653 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1654 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1655 } 1656 rowlen = bi[i+1] - bi[i]; 1657 for (j0=j=0; j<rowlen; j0=j) { 1658 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1659 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1660 } 1661 } 1662 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1663 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1664 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1665 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1666 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1667 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1668 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1669 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1670 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1671 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1672 *B = Aperm; 1673 PetscFunctionReturn(0); 1674 } 1675 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1678 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1679 { 1680 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1681 Mat A = mat->A,B = mat->B; 1682 PetscErrorCode ierr; 1683 PetscReal isend[5],irecv[5]; 1684 1685 PetscFunctionBegin; 1686 info->block_size = 1.0; 1687 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1688 1689 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1690 isend[3] = info->memory; isend[4] = info->mallocs; 1691 1692 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1693 1694 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1695 isend[3] += info->memory; isend[4] += info->mallocs; 1696 if (flag == MAT_LOCAL) { 1697 info->nz_used = isend[0]; 1698 info->nz_allocated = isend[1]; 1699 info->nz_unneeded = isend[2]; 1700 info->memory = isend[3]; 1701 info->mallocs = isend[4]; 1702 } else if (flag == MAT_GLOBAL_MAX) { 1703 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1704 1705 info->nz_used = irecv[0]; 1706 info->nz_allocated = irecv[1]; 1707 info->nz_unneeded = irecv[2]; 1708 info->memory = irecv[3]; 1709 info->mallocs = irecv[4]; 1710 } else if (flag == MAT_GLOBAL_SUM) { 1711 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1712 1713 info->nz_used = irecv[0]; 1714 info->nz_allocated = irecv[1]; 1715 info->nz_unneeded = irecv[2]; 1716 info->memory = irecv[3]; 1717 info->mallocs = irecv[4]; 1718 } 1719 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1720 info->fill_ratio_needed = 0; 1721 info->factor_mallocs = 0; 1722 PetscFunctionReturn(0); 1723 } 1724 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatSetOption_MPIAIJ" 1727 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1728 { 1729 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 switch (op) { 1734 case MAT_NEW_NONZERO_LOCATIONS: 1735 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1736 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1737 case MAT_KEEP_NONZERO_PATTERN: 1738 case MAT_NEW_NONZERO_LOCATION_ERR: 1739 case MAT_USE_INODES: 1740 case MAT_IGNORE_ZERO_ENTRIES: 1741 MatCheckPreallocated(A,1); 1742 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1744 break; 1745 case MAT_ROW_ORIENTED: 1746 a->roworiented = flg; 1747 1748 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1749 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1750 break; 1751 case MAT_NEW_DIAGONALS: 1752 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1753 break; 1754 case MAT_IGNORE_OFF_PROC_ENTRIES: 1755 a->donotstash = flg; 1756 break; 1757 case MAT_SPD: 1758 A->spd_set = PETSC_TRUE; 1759 A->spd = flg; 1760 if (flg) { 1761 A->symmetric = PETSC_TRUE; 1762 A->structurally_symmetric = PETSC_TRUE; 1763 A->symmetric_set = PETSC_TRUE; 1764 A->structurally_symmetric_set = PETSC_TRUE; 1765 } 1766 break; 1767 case MAT_SYMMETRIC: 1768 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1769 break; 1770 case MAT_STRUCTURALLY_SYMMETRIC: 1771 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1772 break; 1773 case MAT_HERMITIAN: 1774 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1775 break; 1776 case MAT_SYMMETRY_ETERNAL: 1777 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1778 break; 1779 default: 1780 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1781 } 1782 PetscFunctionReturn(0); 1783 } 1784 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatGetRow_MPIAIJ" 1787 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1788 { 1789 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1790 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1791 PetscErrorCode ierr; 1792 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1793 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1794 PetscInt *cmap,*idx_p; 1795 1796 PetscFunctionBegin; 1797 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1798 mat->getrowactive = PETSC_TRUE; 1799 1800 if (!mat->rowvalues && (idx || v)) { 1801 /* 1802 allocate enough space to hold information from the longest row. 1803 */ 1804 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1805 PetscInt max = 1,tmp; 1806 for (i=0; i<matin->rmap->n; i++) { 1807 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1808 if (max < tmp) max = tmp; 1809 } 1810 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1811 } 1812 1813 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1814 lrow = row - rstart; 1815 1816 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1817 if (!v) {pvA = 0; pvB = 0;} 1818 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1819 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1820 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1821 nztot = nzA + nzB; 1822 1823 cmap = mat->garray; 1824 if (v || idx) { 1825 if (nztot) { 1826 /* Sort by increasing column numbers, assuming A and B already sorted */ 1827 PetscInt imark = -1; 1828 if (v) { 1829 *v = v_p = mat->rowvalues; 1830 for (i=0; i<nzB; i++) { 1831 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1832 else break; 1833 } 1834 imark = i; 1835 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1836 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1837 } 1838 if (idx) { 1839 *idx = idx_p = mat->rowindices; 1840 if (imark > -1) { 1841 for (i=0; i<imark; i++) { 1842 idx_p[i] = cmap[cworkB[i]]; 1843 } 1844 } else { 1845 for (i=0; i<nzB; i++) { 1846 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1847 else break; 1848 } 1849 imark = i; 1850 } 1851 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1852 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1853 } 1854 } else { 1855 if (idx) *idx = 0; 1856 if (v) *v = 0; 1857 } 1858 } 1859 *nz = nztot; 1860 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1861 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1862 PetscFunctionReturn(0); 1863 } 1864 1865 #undef __FUNCT__ 1866 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1867 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1868 { 1869 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1870 1871 PetscFunctionBegin; 1872 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1873 aij->getrowactive = PETSC_FALSE; 1874 PetscFunctionReturn(0); 1875 } 1876 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "MatNorm_MPIAIJ" 1879 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1880 { 1881 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1882 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1883 PetscErrorCode ierr; 1884 PetscInt i,j,cstart = mat->cmap->rstart; 1885 PetscReal sum = 0.0; 1886 MatScalar *v; 1887 1888 PetscFunctionBegin; 1889 if (aij->size == 1) { 1890 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1891 } else { 1892 if (type == NORM_FROBENIUS) { 1893 v = amat->a; 1894 for (i=0; i<amat->nz; i++) { 1895 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1896 } 1897 v = bmat->a; 1898 for (i=0; i<bmat->nz; i++) { 1899 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1900 } 1901 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1902 *norm = PetscSqrtReal(*norm); 1903 } else if (type == NORM_1) { /* max column norm */ 1904 PetscReal *tmp,*tmp2; 1905 PetscInt *jj,*garray = aij->garray; 1906 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1907 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1908 *norm = 0.0; 1909 v = amat->a; jj = amat->j; 1910 for (j=0; j<amat->nz; j++) { 1911 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1912 } 1913 v = bmat->a; jj = bmat->j; 1914 for (j=0; j<bmat->nz; j++) { 1915 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1916 } 1917 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1918 for (j=0; j<mat->cmap->N; j++) { 1919 if (tmp2[j] > *norm) *norm = tmp2[j]; 1920 } 1921 ierr = PetscFree(tmp);CHKERRQ(ierr); 1922 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1923 } else if (type == NORM_INFINITY) { /* max row norm */ 1924 PetscReal ntemp = 0.0; 1925 for (j=0; j<aij->A->rmap->n; j++) { 1926 v = amat->a + amat->i[j]; 1927 sum = 0.0; 1928 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1929 sum += PetscAbsScalar(*v); v++; 1930 } 1931 v = bmat->a + bmat->i[j]; 1932 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1933 sum += PetscAbsScalar(*v); v++; 1934 } 1935 if (sum > ntemp) ntemp = sum; 1936 } 1937 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1938 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1939 } 1940 PetscFunctionReturn(0); 1941 } 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatTranspose_MPIAIJ" 1945 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1946 { 1947 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1948 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1949 PetscErrorCode ierr; 1950 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1951 PetscInt cstart = A->cmap->rstart,ncol; 1952 Mat B; 1953 MatScalar *array; 1954 1955 PetscFunctionBegin; 1956 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1957 1958 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1959 ai = Aloc->i; aj = Aloc->j; 1960 bi = Bloc->i; bj = Bloc->j; 1961 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1962 PetscInt *d_nnz,*g_nnz,*o_nnz; 1963 PetscSFNode *oloc; 1964 PETSC_UNUSED PetscSF sf; 1965 1966 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1967 /* compute d_nnz for preallocation */ 1968 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1969 for (i=0; i<ai[ma]; i++) { 1970 d_nnz[aj[i]]++; 1971 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1972 } 1973 /* compute local off-diagonal contributions */ 1974 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1975 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1976 /* map those to global */ 1977 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1978 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1979 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1980 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1981 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1982 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1983 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1984 1985 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1986 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1987 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1988 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1989 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1990 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1991 } else { 1992 B = *matout; 1993 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1994 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1995 } 1996 1997 /* copy over the A part */ 1998 array = Aloc->a; 1999 row = A->rmap->rstart; 2000 for (i=0; i<ma; i++) { 2001 ncol = ai[i+1]-ai[i]; 2002 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2003 row++; 2004 array += ncol; aj += ncol; 2005 } 2006 aj = Aloc->j; 2007 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2008 2009 /* copy over the B part */ 2010 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2011 array = Bloc->a; 2012 row = A->rmap->rstart; 2013 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2014 cols_tmp = cols; 2015 for (i=0; i<mb; i++) { 2016 ncol = bi[i+1]-bi[i]; 2017 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2018 row++; 2019 array += ncol; cols_tmp += ncol; 2020 } 2021 ierr = PetscFree(cols);CHKERRQ(ierr); 2022 2023 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2024 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2025 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2026 *matout = B; 2027 } else { 2028 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2029 } 2030 PetscFunctionReturn(0); 2031 } 2032 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2035 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2036 { 2037 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2038 Mat a = aij->A,b = aij->B; 2039 PetscErrorCode ierr; 2040 PetscInt s1,s2,s3; 2041 2042 PetscFunctionBegin; 2043 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2044 if (rr) { 2045 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2046 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2047 /* Overlap communication with computation. */ 2048 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2049 } 2050 if (ll) { 2051 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2052 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2053 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2054 } 2055 /* scale the diagonal block */ 2056 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2057 2058 if (rr) { 2059 /* Do a scatter end and then right scale the off-diagonal block */ 2060 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2061 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2062 } 2063 PetscFunctionReturn(0); 2064 } 2065 2066 #undef __FUNCT__ 2067 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2068 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2069 { 2070 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2071 PetscErrorCode ierr; 2072 2073 PetscFunctionBegin; 2074 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatEqual_MPIAIJ" 2080 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2081 { 2082 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2083 Mat a,b,c,d; 2084 PetscBool flg; 2085 PetscErrorCode ierr; 2086 2087 PetscFunctionBegin; 2088 a = matA->A; b = matA->B; 2089 c = matB->A; d = matB->B; 2090 2091 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2092 if (flg) { 2093 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2094 } 2095 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 #undef __FUNCT__ 2100 #define __FUNCT__ "MatCopy_MPIAIJ" 2101 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2102 { 2103 PetscErrorCode ierr; 2104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2105 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2106 2107 PetscFunctionBegin; 2108 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2109 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2110 /* because of the column compression in the off-processor part of the matrix a->B, 2111 the number of columns in a->B and b->B may be different, hence we cannot call 2112 the MatCopy() directly on the two parts. If need be, we can provide a more 2113 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2114 then copying the submatrices */ 2115 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2116 } else { 2117 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2118 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2119 } 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatSetUp_MPIAIJ" 2125 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2126 { 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2136 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2137 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2138 { 2139 PetscInt i,m=Y->rmap->N; 2140 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2141 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2142 const PetscInt *xi = x->i,*yi = y->i; 2143 2144 PetscFunctionBegin; 2145 /* Set the number of nonzeros in the new matrix */ 2146 for (i=0; i<m; i++) { 2147 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2148 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2149 nnz[i] = 0; 2150 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2151 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2152 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2153 nnz[i]++; 2154 } 2155 for (; k<nzy; k++) nnz[i]++; 2156 } 2157 PetscFunctionReturn(0); 2158 } 2159 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatAXPY_MPIAIJ" 2162 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2163 { 2164 PetscErrorCode ierr; 2165 PetscInt i; 2166 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2167 PetscBLASInt bnz,one=1; 2168 Mat_SeqAIJ *x,*y; 2169 2170 PetscFunctionBegin; 2171 if (str == SAME_NONZERO_PATTERN) { 2172 PetscScalar alpha = a; 2173 x = (Mat_SeqAIJ*)xx->A->data; 2174 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2175 y = (Mat_SeqAIJ*)yy->A->data; 2176 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2177 x = (Mat_SeqAIJ*)xx->B->data; 2178 y = (Mat_SeqAIJ*)yy->B->data; 2179 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2180 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2181 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2182 } else if (str == SUBSET_NONZERO_PATTERN) { 2183 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2184 2185 x = (Mat_SeqAIJ*)xx->B->data; 2186 y = (Mat_SeqAIJ*)yy->B->data; 2187 if (y->xtoy && y->XtoY != xx->B) { 2188 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2189 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2190 } 2191 if (!y->xtoy) { /* get xtoy */ 2192 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2193 y->XtoY = xx->B; 2194 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2195 } 2196 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2197 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2198 } else { 2199 Mat B; 2200 PetscInt *nnz_d,*nnz_o; 2201 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2202 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2203 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2204 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2205 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2206 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2207 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2208 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2209 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2210 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2211 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2212 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2213 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2214 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2215 } 2216 PetscFunctionReturn(0); 2217 } 2218 2219 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatConjugate_MPIAIJ" 2223 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2224 { 2225 #if defined(PETSC_USE_COMPLEX) 2226 PetscErrorCode ierr; 2227 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2228 2229 PetscFunctionBegin; 2230 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2231 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2232 #else 2233 PetscFunctionBegin; 2234 #endif 2235 PetscFunctionReturn(0); 2236 } 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatRealPart_MPIAIJ" 2240 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2241 { 2242 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2243 PetscErrorCode ierr; 2244 2245 PetscFunctionBegin; 2246 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2247 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2248 PetscFunctionReturn(0); 2249 } 2250 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2253 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2254 { 2255 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2256 PetscErrorCode ierr; 2257 2258 PetscFunctionBegin; 2259 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2260 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2261 PetscFunctionReturn(0); 2262 } 2263 2264 #if defined(PETSC_HAVE_PBGL) 2265 2266 #include <boost/parallel/mpi/bsp_process_group.hpp> 2267 #include <boost/graph/distributed/ilu_default_graph.hpp> 2268 #include <boost/graph/distributed/ilu_0_block.hpp> 2269 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2270 #include <boost/graph/distributed/petsc/interface.hpp> 2271 #include <boost/multi_array.hpp> 2272 #include <boost/parallel/distributed_property_map->hpp> 2273 2274 #undef __FUNCT__ 2275 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2276 /* 2277 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2278 */ 2279 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2280 { 2281 namespace petsc = boost::distributed::petsc; 2282 2283 namespace graph_dist = boost::graph::distributed; 2284 using boost::graph::distributed::ilu_default::process_group_type; 2285 using boost::graph::ilu_permuted; 2286 2287 PetscBool row_identity, col_identity; 2288 PetscContainer c; 2289 PetscInt m, n, M, N; 2290 PetscErrorCode ierr; 2291 2292 PetscFunctionBegin; 2293 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2294 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2295 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2296 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2297 2298 process_group_type pg; 2299 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2300 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2301 lgraph_type& level_graph = *lgraph_p; 2302 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2303 2304 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2305 ilu_permuted(level_graph); 2306 2307 /* put together the new matrix */ 2308 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2309 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2310 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2311 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2312 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2313 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2314 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2315 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2316 2317 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2318 ierr = PetscContainerSetPointer(c, lgraph_p); 2319 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2320 ierr = PetscContainerDestroy(&c); 2321 PetscFunctionReturn(0); 2322 } 2323 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2326 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2327 { 2328 PetscFunctionBegin; 2329 PetscFunctionReturn(0); 2330 } 2331 2332 #undef __FUNCT__ 2333 #define __FUNCT__ "MatSolve_MPIAIJ" 2334 /* 2335 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2336 */ 2337 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2338 { 2339 namespace graph_dist = boost::graph::distributed; 2340 2341 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2342 lgraph_type *lgraph_p; 2343 PetscContainer c; 2344 PetscErrorCode ierr; 2345 2346 PetscFunctionBegin; 2347 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2348 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2349 ierr = VecCopy(b, x);CHKERRQ(ierr); 2350 2351 PetscScalar *array_x; 2352 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2353 PetscInt sx; 2354 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2355 2356 PetscScalar *array_b; 2357 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2358 PetscInt sb; 2359 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2360 2361 lgraph_type& level_graph = *lgraph_p; 2362 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2363 2364 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2365 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2366 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2367 2368 typedef boost::iterator_property_map<array_ref_type::iterator, 2369 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2370 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2371 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2372 2373 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2374 PetscFunctionReturn(0); 2375 } 2376 #endif 2377 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2381 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2382 { 2383 PetscMPIInt rank,size; 2384 MPI_Comm comm; 2385 PetscErrorCode ierr; 2386 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2387 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2388 PetscInt *rowrange = mat->rmap->range; 2389 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2390 Mat A = aij->A,B=aij->B,C=*matredundant; 2391 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2392 PetscScalar *sbuf_a; 2393 PetscInt nzlocal=a->nz+b->nz; 2394 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2395 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2396 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2397 MatScalar *aworkA,*aworkB; 2398 PetscScalar *vals; 2399 PetscMPIInt tag1,tag2,tag3,imdex; 2400 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2401 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2402 MPI_Status recv_status,*send_status; 2403 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2404 PetscInt **rbuf_j=NULL; 2405 PetscScalar **rbuf_a=NULL; 2406 Mat_Redundant *redund =NULL; 2407 2408 PetscFunctionBegin; 2409 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2410 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2411 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2412 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2413 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2414 2415 if (reuse == MAT_REUSE_MATRIX) { 2416 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2417 if (subsize == 1) { 2418 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2419 redund = c->redundant; 2420 } else { 2421 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2422 redund = c->redundant; 2423 } 2424 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2425 2426 nsends = redund->nsends; 2427 nrecvs = redund->nrecvs; 2428 send_rank = redund->send_rank; 2429 recv_rank = redund->recv_rank; 2430 sbuf_nz = redund->sbuf_nz; 2431 rbuf_nz = redund->rbuf_nz; 2432 sbuf_j = redund->sbuf_j; 2433 sbuf_a = redund->sbuf_a; 2434 rbuf_j = redund->rbuf_j; 2435 rbuf_a = redund->rbuf_a; 2436 } 2437 2438 if (reuse == MAT_INITIAL_MATRIX) { 2439 PetscInt nleftover,np_subcomm; 2440 2441 /* get the destination processors' id send_rank, nsends and nrecvs */ 2442 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2443 2444 np_subcomm = size/nsubcomm; 2445 nleftover = size - nsubcomm*np_subcomm; 2446 2447 /* block of codes below is specific for INTERLACED */ 2448 /* ------------------------------------------------*/ 2449 nsends = 0; nrecvs = 0; 2450 for (i=0; i<size; i++) { 2451 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2452 send_rank[nsends++] = i; 2453 recv_rank[nrecvs++] = i; 2454 } 2455 } 2456 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2457 i = size-nleftover-1; 2458 j = 0; 2459 while (j < nsubcomm - nleftover) { 2460 send_rank[nsends++] = i; 2461 i--; j++; 2462 } 2463 } 2464 2465 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2466 for (i=0; i<nleftover; i++) { 2467 recv_rank[nrecvs++] = size-nleftover+i; 2468 } 2469 } 2470 /*----------------------------------------------*/ 2471 2472 /* allocate sbuf_j, sbuf_a */ 2473 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2474 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2475 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2476 /* 2477 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2478 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2479 */ 2480 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2481 2482 /* copy mat's local entries into the buffers */ 2483 if (reuse == MAT_INITIAL_MATRIX) { 2484 rownz_max = 0; 2485 rptr = sbuf_j; 2486 cols = sbuf_j + rend-rstart + 1; 2487 vals = sbuf_a; 2488 rptr[0] = 0; 2489 for (i=0; i<rend-rstart; i++) { 2490 row = i + rstart; 2491 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2492 ncols = nzA + nzB; 2493 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2494 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2495 /* load the column indices for this row into cols */ 2496 lwrite = 0; 2497 for (l=0; l<nzB; l++) { 2498 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2499 vals[lwrite] = aworkB[l]; 2500 cols[lwrite++] = ctmp; 2501 } 2502 } 2503 for (l=0; l<nzA; l++) { 2504 vals[lwrite] = aworkA[l]; 2505 cols[lwrite++] = cstart + cworkA[l]; 2506 } 2507 for (l=0; l<nzB; l++) { 2508 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2509 vals[lwrite] = aworkB[l]; 2510 cols[lwrite++] = ctmp; 2511 } 2512 } 2513 vals += ncols; 2514 cols += ncols; 2515 rptr[i+1] = rptr[i] + ncols; 2516 if (rownz_max < ncols) rownz_max = ncols; 2517 } 2518 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2519 } else { /* only copy matrix values into sbuf_a */ 2520 rptr = sbuf_j; 2521 vals = sbuf_a; 2522 rptr[0] = 0; 2523 for (i=0; i<rend-rstart; i++) { 2524 row = i + rstart; 2525 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2526 ncols = nzA + nzB; 2527 cworkB = b->j + b->i[i]; 2528 aworkA = a->a + a->i[i]; 2529 aworkB = b->a + b->i[i]; 2530 lwrite = 0; 2531 for (l=0; l<nzB; l++) { 2532 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2533 } 2534 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2535 for (l=0; l<nzB; l++) { 2536 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2537 } 2538 vals += ncols; 2539 rptr[i+1] = rptr[i] + ncols; 2540 } 2541 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2542 2543 /* send nzlocal to others, and recv other's nzlocal */ 2544 /*--------------------------------------------------*/ 2545 if (reuse == MAT_INITIAL_MATRIX) { 2546 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2547 2548 s_waits2 = s_waits3 + nsends; 2549 s_waits1 = s_waits2 + nsends; 2550 r_waits1 = s_waits1 + nsends; 2551 r_waits2 = r_waits1 + nrecvs; 2552 r_waits3 = r_waits2 + nrecvs; 2553 } else { 2554 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2555 2556 r_waits3 = s_waits3 + nsends; 2557 } 2558 2559 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2560 if (reuse == MAT_INITIAL_MATRIX) { 2561 /* get new tags to keep the communication clean */ 2562 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2563 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2564 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2565 2566 /* post receives of other's nzlocal */ 2567 for (i=0; i<nrecvs; i++) { 2568 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2569 } 2570 /* send nzlocal to others */ 2571 for (i=0; i<nsends; i++) { 2572 sbuf_nz[i] = nzlocal; 2573 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2574 } 2575 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2576 count = nrecvs; 2577 while (count) { 2578 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2579 2580 recv_rank[imdex] = recv_status.MPI_SOURCE; 2581 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2582 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2583 2584 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2585 2586 rbuf_nz[imdex] += i + 2; 2587 2588 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2589 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2590 count--; 2591 } 2592 /* wait on sends of nzlocal */ 2593 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2594 /* send mat->i,j to others, and recv from other's */ 2595 /*------------------------------------------------*/ 2596 for (i=0; i<nsends; i++) { 2597 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2598 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2599 } 2600 /* wait on receives of mat->i,j */ 2601 /*------------------------------*/ 2602 count = nrecvs; 2603 while (count) { 2604 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2605 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2606 count--; 2607 } 2608 /* wait on sends of mat->i,j */ 2609 /*---------------------------*/ 2610 if (nsends) { 2611 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2612 } 2613 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2614 2615 /* post receives, send and receive mat->a */ 2616 /*----------------------------------------*/ 2617 for (imdex=0; imdex<nrecvs; imdex++) { 2618 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2619 } 2620 for (i=0; i<nsends; i++) { 2621 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2622 } 2623 count = nrecvs; 2624 while (count) { 2625 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2626 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2627 count--; 2628 } 2629 if (nsends) { 2630 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2631 } 2632 2633 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2634 2635 /* create redundant matrix */ 2636 /*-------------------------*/ 2637 if (reuse == MAT_INITIAL_MATRIX) { 2638 const PetscInt *range; 2639 PetscInt rstart_sub,rend_sub,mloc_sub; 2640 2641 /* compute rownz_max for preallocation */ 2642 for (imdex=0; imdex<nrecvs; imdex++) { 2643 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2644 rptr = rbuf_j[imdex]; 2645 for (i=0; i<j; i++) { 2646 ncols = rptr[i+1] - rptr[i]; 2647 if (rownz_max < ncols) rownz_max = ncols; 2648 } 2649 } 2650 2651 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2652 2653 /* get local size of redundant matrix 2654 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2655 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2656 rstart_sub = range[nsubcomm*subrank]; 2657 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2658 rend_sub = range[nsubcomm*(subrank+1)]; 2659 } else { 2660 rend_sub = mat->rmap->N; 2661 } 2662 mloc_sub = rend_sub - rstart_sub; 2663 2664 if (M == N) { 2665 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2666 } else { /* non-square matrix */ 2667 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2668 } 2669 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2670 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2671 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2672 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2673 } else { 2674 C = *matredundant; 2675 } 2676 2677 /* insert local matrix entries */ 2678 rptr = sbuf_j; 2679 cols = sbuf_j + rend-rstart + 1; 2680 vals = sbuf_a; 2681 for (i=0; i<rend-rstart; i++) { 2682 row = i + rstart; 2683 ncols = rptr[i+1] - rptr[i]; 2684 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2685 vals += ncols; 2686 cols += ncols; 2687 } 2688 /* insert received matrix entries */ 2689 for (imdex=0; imdex<nrecvs; imdex++) { 2690 rstart = rowrange[recv_rank[imdex]]; 2691 rend = rowrange[recv_rank[imdex]+1]; 2692 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2693 rptr = rbuf_j[imdex]; 2694 cols = rbuf_j[imdex] + rend-rstart + 1; 2695 vals = rbuf_a[imdex]; 2696 for (i=0; i<rend-rstart; i++) { 2697 row = i + rstart; 2698 ncols = rptr[i+1] - rptr[i]; 2699 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2700 vals += ncols; 2701 cols += ncols; 2702 } 2703 } 2704 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2705 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2706 2707 if (reuse == MAT_INITIAL_MATRIX) { 2708 *matredundant = C; 2709 2710 /* create a supporting struct and attach it to C for reuse */ 2711 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2712 if (subsize == 1) { 2713 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2714 c->redundant = redund; 2715 } else { 2716 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2717 c->redundant = redund; 2718 } 2719 2720 redund->nzlocal = nzlocal; 2721 redund->nsends = nsends; 2722 redund->nrecvs = nrecvs; 2723 redund->send_rank = send_rank; 2724 redund->recv_rank = recv_rank; 2725 redund->sbuf_nz = sbuf_nz; 2726 redund->rbuf_nz = rbuf_nz; 2727 redund->sbuf_j = sbuf_j; 2728 redund->sbuf_a = sbuf_a; 2729 redund->rbuf_j = rbuf_j; 2730 redund->rbuf_a = rbuf_a; 2731 redund->psubcomm = NULL; 2732 } 2733 PetscFunctionReturn(0); 2734 } 2735 2736 #undef __FUNCT__ 2737 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2738 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2739 { 2740 PetscErrorCode ierr; 2741 MPI_Comm comm; 2742 PetscMPIInt size,subsize; 2743 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2744 Mat_Redundant *redund=NULL; 2745 PetscSubcomm psubcomm=NULL; 2746 MPI_Comm subcomm_in=subcomm; 2747 Mat *matseq; 2748 IS isrow,iscol; 2749 2750 PetscFunctionBegin; 2751 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2752 if (reuse == MAT_INITIAL_MATRIX) { 2753 /* create psubcomm, then get subcomm */ 2754 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2755 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2756 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2757 2758 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2759 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2760 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2761 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2762 subcomm = psubcomm->comm; 2763 } else { /* retrieve psubcomm and subcomm */ 2764 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2765 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2766 if (subsize == 1) { 2767 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2768 redund = c->redundant; 2769 } else { 2770 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2771 redund = c->redundant; 2772 } 2773 psubcomm = redund->psubcomm; 2774 } 2775 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2776 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2777 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2778 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2779 if (subsize == 1) { 2780 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2781 c->redundant->psubcomm = psubcomm; 2782 } else { 2783 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2784 c->redundant->psubcomm = psubcomm ; 2785 } 2786 } 2787 PetscFunctionReturn(0); 2788 } 2789 } 2790 2791 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2792 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2793 if (reuse == MAT_INITIAL_MATRIX) { 2794 /* create a local sequential matrix matseq[0] */ 2795 mloc_sub = PETSC_DECIDE; 2796 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2797 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2798 rstart = rend - mloc_sub; 2799 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2800 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2801 } else { /* reuse == MAT_REUSE_MATRIX */ 2802 if (subsize == 1) { 2803 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2804 redund = c->redundant; 2805 } else { 2806 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2807 redund = c->redundant; 2808 } 2809 2810 isrow = redund->isrow; 2811 iscol = redund->iscol; 2812 matseq = redund->matseq; 2813 } 2814 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2815 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2816 2817 if (reuse == MAT_INITIAL_MATRIX) { 2818 /* create a supporting struct and attach it to C for reuse */ 2819 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2820 if (subsize == 1) { 2821 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2822 c->redundant = redund; 2823 } else { 2824 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2825 c->redundant = redund; 2826 } 2827 redund->isrow = isrow; 2828 redund->iscol = iscol; 2829 redund->matseq = matseq; 2830 redund->psubcomm = psubcomm; 2831 } 2832 PetscFunctionReturn(0); 2833 } 2834 2835 #undef __FUNCT__ 2836 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2837 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2838 { 2839 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2840 PetscErrorCode ierr; 2841 PetscInt i,*idxb = 0; 2842 PetscScalar *va,*vb; 2843 Vec vtmp; 2844 2845 PetscFunctionBegin; 2846 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2847 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2848 if (idx) { 2849 for (i=0; i<A->rmap->n; i++) { 2850 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2851 } 2852 } 2853 2854 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2855 if (idx) { 2856 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2857 } 2858 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2859 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2860 2861 for (i=0; i<A->rmap->n; i++) { 2862 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2863 va[i] = vb[i]; 2864 if (idx) idx[i] = a->garray[idxb[i]]; 2865 } 2866 } 2867 2868 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2869 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2870 ierr = PetscFree(idxb);CHKERRQ(ierr); 2871 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2872 PetscFunctionReturn(0); 2873 } 2874 2875 #undef __FUNCT__ 2876 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2877 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2878 { 2879 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2880 PetscErrorCode ierr; 2881 PetscInt i,*idxb = 0; 2882 PetscScalar *va,*vb; 2883 Vec vtmp; 2884 2885 PetscFunctionBegin; 2886 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2887 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2888 if (idx) { 2889 for (i=0; i<A->cmap->n; i++) { 2890 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2891 } 2892 } 2893 2894 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2895 if (idx) { 2896 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2897 } 2898 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2899 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2900 2901 for (i=0; i<A->rmap->n; i++) { 2902 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2903 va[i] = vb[i]; 2904 if (idx) idx[i] = a->garray[idxb[i]]; 2905 } 2906 } 2907 2908 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2909 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2910 ierr = PetscFree(idxb);CHKERRQ(ierr); 2911 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2912 PetscFunctionReturn(0); 2913 } 2914 2915 #undef __FUNCT__ 2916 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2917 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2918 { 2919 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2920 PetscInt n = A->rmap->n; 2921 PetscInt cstart = A->cmap->rstart; 2922 PetscInt *cmap = mat->garray; 2923 PetscInt *diagIdx, *offdiagIdx; 2924 Vec diagV, offdiagV; 2925 PetscScalar *a, *diagA, *offdiagA; 2926 PetscInt r; 2927 PetscErrorCode ierr; 2928 2929 PetscFunctionBegin; 2930 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2931 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2932 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2933 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2934 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2935 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2936 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2937 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2938 for (r = 0; r < n; ++r) { 2939 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2940 a[r] = diagA[r]; 2941 idx[r] = cstart + diagIdx[r]; 2942 } else { 2943 a[r] = offdiagA[r]; 2944 idx[r] = cmap[offdiagIdx[r]]; 2945 } 2946 } 2947 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2948 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2949 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2950 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2951 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2952 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2953 PetscFunctionReturn(0); 2954 } 2955 2956 #undef __FUNCT__ 2957 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2958 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2959 { 2960 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2961 PetscInt n = A->rmap->n; 2962 PetscInt cstart = A->cmap->rstart; 2963 PetscInt *cmap = mat->garray; 2964 PetscInt *diagIdx, *offdiagIdx; 2965 Vec diagV, offdiagV; 2966 PetscScalar *a, *diagA, *offdiagA; 2967 PetscInt r; 2968 PetscErrorCode ierr; 2969 2970 PetscFunctionBegin; 2971 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2972 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2973 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2974 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2975 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2976 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2977 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2978 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2979 for (r = 0; r < n; ++r) { 2980 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2981 a[r] = diagA[r]; 2982 idx[r] = cstart + diagIdx[r]; 2983 } else { 2984 a[r] = offdiagA[r]; 2985 idx[r] = cmap[offdiagIdx[r]]; 2986 } 2987 } 2988 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2989 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2990 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2991 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2992 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2993 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2994 PetscFunctionReturn(0); 2995 } 2996 2997 #undef __FUNCT__ 2998 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2999 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3000 { 3001 PetscErrorCode ierr; 3002 Mat *dummy; 3003 3004 PetscFunctionBegin; 3005 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3006 *newmat = *dummy; 3007 ierr = PetscFree(dummy);CHKERRQ(ierr); 3008 PetscFunctionReturn(0); 3009 } 3010 3011 #undef __FUNCT__ 3012 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3013 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3014 { 3015 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3016 PetscErrorCode ierr; 3017 3018 PetscFunctionBegin; 3019 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3020 PetscFunctionReturn(0); 3021 } 3022 3023 #undef __FUNCT__ 3024 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3025 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3026 { 3027 PetscErrorCode ierr; 3028 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3029 3030 PetscFunctionBegin; 3031 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3032 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3033 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3034 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3035 PetscFunctionReturn(0); 3036 } 3037 3038 /* -------------------------------------------------------------------*/ 3039 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3040 MatGetRow_MPIAIJ, 3041 MatRestoreRow_MPIAIJ, 3042 MatMult_MPIAIJ, 3043 /* 4*/ MatMultAdd_MPIAIJ, 3044 MatMultTranspose_MPIAIJ, 3045 MatMultTransposeAdd_MPIAIJ, 3046 #if defined(PETSC_HAVE_PBGL) 3047 MatSolve_MPIAIJ, 3048 #else 3049 0, 3050 #endif 3051 0, 3052 0, 3053 /*10*/ 0, 3054 0, 3055 0, 3056 MatSOR_MPIAIJ, 3057 MatTranspose_MPIAIJ, 3058 /*15*/ MatGetInfo_MPIAIJ, 3059 MatEqual_MPIAIJ, 3060 MatGetDiagonal_MPIAIJ, 3061 MatDiagonalScale_MPIAIJ, 3062 MatNorm_MPIAIJ, 3063 /*20*/ MatAssemblyBegin_MPIAIJ, 3064 MatAssemblyEnd_MPIAIJ, 3065 MatSetOption_MPIAIJ, 3066 MatZeroEntries_MPIAIJ, 3067 /*24*/ MatZeroRows_MPIAIJ, 3068 0, 3069 #if defined(PETSC_HAVE_PBGL) 3070 0, 3071 #else 3072 0, 3073 #endif 3074 0, 3075 0, 3076 /*29*/ MatSetUp_MPIAIJ, 3077 #if defined(PETSC_HAVE_PBGL) 3078 0, 3079 #else 3080 0, 3081 #endif 3082 0, 3083 0, 3084 0, 3085 /*34*/ MatDuplicate_MPIAIJ, 3086 0, 3087 0, 3088 0, 3089 0, 3090 /*39*/ MatAXPY_MPIAIJ, 3091 MatGetSubMatrices_MPIAIJ, 3092 MatIncreaseOverlap_MPIAIJ, 3093 MatGetValues_MPIAIJ, 3094 MatCopy_MPIAIJ, 3095 /*44*/ MatGetRowMax_MPIAIJ, 3096 MatScale_MPIAIJ, 3097 0, 3098 0, 3099 MatZeroRowsColumns_MPIAIJ, 3100 /*49*/ MatSetRandom_MPIAIJ, 3101 0, 3102 0, 3103 0, 3104 0, 3105 /*54*/ MatFDColoringCreate_MPIXAIJ, 3106 0, 3107 MatSetUnfactored_MPIAIJ, 3108 MatPermute_MPIAIJ, 3109 0, 3110 /*59*/ MatGetSubMatrix_MPIAIJ, 3111 MatDestroy_MPIAIJ, 3112 MatView_MPIAIJ, 3113 0, 3114 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3115 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3116 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3117 0, 3118 0, 3119 0, 3120 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3121 MatGetRowMinAbs_MPIAIJ, 3122 0, 3123 MatSetColoring_MPIAIJ, 3124 0, 3125 MatSetValuesAdifor_MPIAIJ, 3126 /*75*/ MatFDColoringApply_AIJ, 3127 0, 3128 0, 3129 0, 3130 MatFindZeroDiagonals_MPIAIJ, 3131 /*80*/ 0, 3132 0, 3133 0, 3134 /*83*/ MatLoad_MPIAIJ, 3135 0, 3136 0, 3137 0, 3138 0, 3139 0, 3140 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3141 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3142 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3143 MatPtAP_MPIAIJ_MPIAIJ, 3144 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3145 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3146 0, 3147 0, 3148 0, 3149 0, 3150 /*99*/ 0, 3151 0, 3152 0, 3153 MatConjugate_MPIAIJ, 3154 0, 3155 /*104*/MatSetValuesRow_MPIAIJ, 3156 MatRealPart_MPIAIJ, 3157 MatImaginaryPart_MPIAIJ, 3158 0, 3159 0, 3160 /*109*/0, 3161 MatGetRedundantMatrix_MPIAIJ, 3162 MatGetRowMin_MPIAIJ, 3163 0, 3164 0, 3165 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3166 0, 3167 0, 3168 0, 3169 0, 3170 /*119*/0, 3171 0, 3172 0, 3173 0, 3174 MatGetMultiProcBlock_MPIAIJ, 3175 /*124*/MatFindNonzeroRows_MPIAIJ, 3176 MatGetColumnNorms_MPIAIJ, 3177 MatInvertBlockDiagonal_MPIAIJ, 3178 0, 3179 MatGetSubMatricesParallel_MPIAIJ, 3180 /*129*/0, 3181 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3182 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3183 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3184 0, 3185 /*134*/0, 3186 0, 3187 0, 3188 0, 3189 0, 3190 /*139*/0, 3191 0, 3192 0, 3193 MatFDColoringSetUp_MPIXAIJ 3194 }; 3195 3196 /* ----------------------------------------------------------------------------------------*/ 3197 3198 #undef __FUNCT__ 3199 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3200 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3201 { 3202 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3203 PetscErrorCode ierr; 3204 3205 PetscFunctionBegin; 3206 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3207 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3208 PetscFunctionReturn(0); 3209 } 3210 3211 #undef __FUNCT__ 3212 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3213 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3214 { 3215 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3216 PetscErrorCode ierr; 3217 3218 PetscFunctionBegin; 3219 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3220 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3221 PetscFunctionReturn(0); 3222 } 3223 3224 #undef __FUNCT__ 3225 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3226 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3227 { 3228 Mat_MPIAIJ *b; 3229 PetscErrorCode ierr; 3230 3231 PetscFunctionBegin; 3232 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3233 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3234 b = (Mat_MPIAIJ*)B->data; 3235 3236 if (!B->preallocated) { 3237 /* Explicitly create 2 MATSEQAIJ matrices. */ 3238 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3239 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3240 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3241 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3242 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3243 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3244 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3245 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3246 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3247 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3248 } 3249 3250 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3251 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3252 B->preallocated = PETSC_TRUE; 3253 PetscFunctionReturn(0); 3254 } 3255 3256 #undef __FUNCT__ 3257 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3258 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3259 { 3260 Mat mat; 3261 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3262 PetscErrorCode ierr; 3263 3264 PetscFunctionBegin; 3265 *newmat = 0; 3266 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3267 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3268 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3269 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3270 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3271 a = (Mat_MPIAIJ*)mat->data; 3272 3273 mat->factortype = matin->factortype; 3274 mat->assembled = PETSC_TRUE; 3275 mat->insertmode = NOT_SET_VALUES; 3276 mat->preallocated = PETSC_TRUE; 3277 3278 a->size = oldmat->size; 3279 a->rank = oldmat->rank; 3280 a->donotstash = oldmat->donotstash; 3281 a->roworiented = oldmat->roworiented; 3282 a->rowindices = 0; 3283 a->rowvalues = 0; 3284 a->getrowactive = PETSC_FALSE; 3285 3286 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3287 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3288 3289 if (oldmat->colmap) { 3290 #if defined(PETSC_USE_CTABLE) 3291 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3292 #else 3293 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3294 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3295 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3296 #endif 3297 } else a->colmap = 0; 3298 if (oldmat->garray) { 3299 PetscInt len; 3300 len = oldmat->B->cmap->n; 3301 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3302 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3303 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3304 } else a->garray = 0; 3305 3306 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3307 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3308 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3309 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3310 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3311 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3312 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3313 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3314 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3315 *newmat = mat; 3316 PetscFunctionReturn(0); 3317 } 3318 3319 3320 3321 #undef __FUNCT__ 3322 #define __FUNCT__ "MatLoad_MPIAIJ" 3323 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3324 { 3325 PetscScalar *vals,*svals; 3326 MPI_Comm comm; 3327 PetscErrorCode ierr; 3328 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3329 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3330 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3331 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3332 PetscInt cend,cstart,n,*rowners,sizesset=1; 3333 int fd; 3334 PetscInt bs = 1; 3335 3336 PetscFunctionBegin; 3337 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3338 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3339 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3340 if (!rank) { 3341 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3342 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3343 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3344 } 3345 3346 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3347 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3348 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3349 3350 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3351 3352 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3353 M = header[1]; N = header[2]; 3354 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3355 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3356 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3357 3358 /* If global sizes are set, check if they are consistent with that given in the file */ 3359 if (sizesset) { 3360 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3361 } 3362 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3363 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3364 3365 /* determine ownership of all (block) rows */ 3366 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3367 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3368 else m = newMat->rmap->n; /* Set by user */ 3369 3370 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3371 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3372 3373 /* First process needs enough room for process with most rows */ 3374 if (!rank) { 3375 mmax = rowners[1]; 3376 for (i=2; i<=size; i++) { 3377 mmax = PetscMax(mmax, rowners[i]); 3378 } 3379 } else mmax = -1; /* unused, but compilers complain */ 3380 3381 rowners[0] = 0; 3382 for (i=2; i<=size; i++) { 3383 rowners[i] += rowners[i-1]; 3384 } 3385 rstart = rowners[rank]; 3386 rend = rowners[rank+1]; 3387 3388 /* distribute row lengths to all processors */ 3389 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3390 if (!rank) { 3391 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3392 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3393 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3394 for (j=0; j<m; j++) { 3395 procsnz[0] += ourlens[j]; 3396 } 3397 for (i=1; i<size; i++) { 3398 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3399 /* calculate the number of nonzeros on each processor */ 3400 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3401 procsnz[i] += rowlengths[j]; 3402 } 3403 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3404 } 3405 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3406 } else { 3407 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3408 } 3409 3410 if (!rank) { 3411 /* determine max buffer needed and allocate it */ 3412 maxnz = 0; 3413 for (i=0; i<size; i++) { 3414 maxnz = PetscMax(maxnz,procsnz[i]); 3415 } 3416 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3417 3418 /* read in my part of the matrix column indices */ 3419 nz = procsnz[0]; 3420 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3421 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3422 3423 /* read in every one elses and ship off */ 3424 for (i=1; i<size; i++) { 3425 nz = procsnz[i]; 3426 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3427 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3428 } 3429 ierr = PetscFree(cols);CHKERRQ(ierr); 3430 } else { 3431 /* determine buffer space needed for message */ 3432 nz = 0; 3433 for (i=0; i<m; i++) { 3434 nz += ourlens[i]; 3435 } 3436 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3437 3438 /* receive message of column indices*/ 3439 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3440 } 3441 3442 /* determine column ownership if matrix is not square */ 3443 if (N != M) { 3444 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3445 else n = newMat->cmap->n; 3446 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3447 cstart = cend - n; 3448 } else { 3449 cstart = rstart; 3450 cend = rend; 3451 n = cend - cstart; 3452 } 3453 3454 /* loop over local rows, determining number of off diagonal entries */ 3455 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3456 jj = 0; 3457 for (i=0; i<m; i++) { 3458 for (j=0; j<ourlens[i]; j++) { 3459 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3460 jj++; 3461 } 3462 } 3463 3464 for (i=0; i<m; i++) { 3465 ourlens[i] -= offlens[i]; 3466 } 3467 if (!sizesset) { 3468 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3469 } 3470 3471 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3472 3473 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3474 3475 for (i=0; i<m; i++) { 3476 ourlens[i] += offlens[i]; 3477 } 3478 3479 if (!rank) { 3480 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3481 3482 /* read in my part of the matrix numerical values */ 3483 nz = procsnz[0]; 3484 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3485 3486 /* insert into matrix */ 3487 jj = rstart; 3488 smycols = mycols; 3489 svals = vals; 3490 for (i=0; i<m; i++) { 3491 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3492 smycols += ourlens[i]; 3493 svals += ourlens[i]; 3494 jj++; 3495 } 3496 3497 /* read in other processors and ship out */ 3498 for (i=1; i<size; i++) { 3499 nz = procsnz[i]; 3500 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3501 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3502 } 3503 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3504 } else { 3505 /* receive numeric values */ 3506 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3507 3508 /* receive message of values*/ 3509 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3510 3511 /* insert into matrix */ 3512 jj = rstart; 3513 smycols = mycols; 3514 svals = vals; 3515 for (i=0; i<m; i++) { 3516 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3517 smycols += ourlens[i]; 3518 svals += ourlens[i]; 3519 jj++; 3520 } 3521 } 3522 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3523 ierr = PetscFree(vals);CHKERRQ(ierr); 3524 ierr = PetscFree(mycols);CHKERRQ(ierr); 3525 ierr = PetscFree(rowners);CHKERRQ(ierr); 3526 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3527 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3528 PetscFunctionReturn(0); 3529 } 3530 3531 #undef __FUNCT__ 3532 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3533 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3534 { 3535 PetscErrorCode ierr; 3536 IS iscol_local; 3537 PetscInt csize; 3538 3539 PetscFunctionBegin; 3540 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3541 if (call == MAT_REUSE_MATRIX) { 3542 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3543 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3544 } else { 3545 PetscInt cbs; 3546 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3547 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3548 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3549 } 3550 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3551 if (call == MAT_INITIAL_MATRIX) { 3552 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3553 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3554 } 3555 PetscFunctionReturn(0); 3556 } 3557 3558 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3559 #undef __FUNCT__ 3560 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3561 /* 3562 Not great since it makes two copies of the submatrix, first an SeqAIJ 3563 in local and then by concatenating the local matrices the end result. 3564 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3565 3566 Note: This requires a sequential iscol with all indices. 3567 */ 3568 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3569 { 3570 PetscErrorCode ierr; 3571 PetscMPIInt rank,size; 3572 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3573 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3574 PetscBool allcolumns, colflag; 3575 Mat M,Mreuse; 3576 MatScalar *vwork,*aa; 3577 MPI_Comm comm; 3578 Mat_SeqAIJ *aij; 3579 3580 PetscFunctionBegin; 3581 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3582 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3583 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3584 3585 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3586 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3587 if (colflag && ncol == mat->cmap->N) { 3588 allcolumns = PETSC_TRUE; 3589 } else { 3590 allcolumns = PETSC_FALSE; 3591 } 3592 if (call == MAT_REUSE_MATRIX) { 3593 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3594 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3595 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3596 } else { 3597 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3598 } 3599 3600 /* 3601 m - number of local rows 3602 n - number of columns (same on all processors) 3603 rstart - first row in new global matrix generated 3604 */ 3605 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3606 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3607 if (call == MAT_INITIAL_MATRIX) { 3608 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3609 ii = aij->i; 3610 jj = aij->j; 3611 3612 /* 3613 Determine the number of non-zeros in the diagonal and off-diagonal 3614 portions of the matrix in order to do correct preallocation 3615 */ 3616 3617 /* first get start and end of "diagonal" columns */ 3618 if (csize == PETSC_DECIDE) { 3619 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3620 if (mglobal == n) { /* square matrix */ 3621 nlocal = m; 3622 } else { 3623 nlocal = n/size + ((n % size) > rank); 3624 } 3625 } else { 3626 nlocal = csize; 3627 } 3628 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3629 rstart = rend - nlocal; 3630 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3631 3632 /* next, compute all the lengths */ 3633 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3634 olens = dlens + m; 3635 for (i=0; i<m; i++) { 3636 jend = ii[i+1] - ii[i]; 3637 olen = 0; 3638 dlen = 0; 3639 for (j=0; j<jend; j++) { 3640 if (*jj < rstart || *jj >= rend) olen++; 3641 else dlen++; 3642 jj++; 3643 } 3644 olens[i] = olen; 3645 dlens[i] = dlen; 3646 } 3647 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3648 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3649 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3650 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3651 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3652 ierr = PetscFree(dlens);CHKERRQ(ierr); 3653 } else { 3654 PetscInt ml,nl; 3655 3656 M = *newmat; 3657 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3658 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3659 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3660 /* 3661 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3662 rather than the slower MatSetValues(). 3663 */ 3664 M->was_assembled = PETSC_TRUE; 3665 M->assembled = PETSC_FALSE; 3666 } 3667 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3668 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3669 ii = aij->i; 3670 jj = aij->j; 3671 aa = aij->a; 3672 for (i=0; i<m; i++) { 3673 row = rstart + i; 3674 nz = ii[i+1] - ii[i]; 3675 cwork = jj; jj += nz; 3676 vwork = aa; aa += nz; 3677 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3678 } 3679 3680 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3681 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3682 *newmat = M; 3683 3684 /* save submatrix used in processor for next request */ 3685 if (call == MAT_INITIAL_MATRIX) { 3686 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3687 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3688 } 3689 PetscFunctionReturn(0); 3690 } 3691 3692 #undef __FUNCT__ 3693 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3694 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3695 { 3696 PetscInt m,cstart, cend,j,nnz,i,d; 3697 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3698 const PetscInt *JJ; 3699 PetscScalar *values; 3700 PetscErrorCode ierr; 3701 3702 PetscFunctionBegin; 3703 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3704 3705 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3706 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3707 m = B->rmap->n; 3708 cstart = B->cmap->rstart; 3709 cend = B->cmap->rend; 3710 rstart = B->rmap->rstart; 3711 3712 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3713 3714 #if defined(PETSC_USE_DEBUGGING) 3715 for (i=0; i<m; i++) { 3716 nnz = Ii[i+1]- Ii[i]; 3717 JJ = J + Ii[i]; 3718 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3719 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3720 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3721 } 3722 #endif 3723 3724 for (i=0; i<m; i++) { 3725 nnz = Ii[i+1]- Ii[i]; 3726 JJ = J + Ii[i]; 3727 nnz_max = PetscMax(nnz_max,nnz); 3728 d = 0; 3729 for (j=0; j<nnz; j++) { 3730 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3731 } 3732 d_nnz[i] = d; 3733 o_nnz[i] = nnz - d; 3734 } 3735 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3736 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3737 3738 if (v) values = (PetscScalar*)v; 3739 else { 3740 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3741 } 3742 3743 for (i=0; i<m; i++) { 3744 ii = i + rstart; 3745 nnz = Ii[i+1]- Ii[i]; 3746 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3747 } 3748 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3749 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3750 3751 if (!v) { 3752 ierr = PetscFree(values);CHKERRQ(ierr); 3753 } 3754 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3755 PetscFunctionReturn(0); 3756 } 3757 3758 #undef __FUNCT__ 3759 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3760 /*@ 3761 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3762 (the default parallel PETSc format). 3763 3764 Collective on MPI_Comm 3765 3766 Input Parameters: 3767 + B - the matrix 3768 . i - the indices into j for the start of each local row (starts with zero) 3769 . j - the column indices for each local row (starts with zero) 3770 - v - optional values in the matrix 3771 3772 Level: developer 3773 3774 Notes: 3775 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3776 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3777 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3778 3779 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3780 3781 The format which is used for the sparse matrix input, is equivalent to a 3782 row-major ordering.. i.e for the following matrix, the input data expected is 3783 as shown: 3784 3785 1 0 0 3786 2 0 3 P0 3787 ------- 3788 4 5 6 P1 3789 3790 Process0 [P0]: rows_owned=[0,1] 3791 i = {0,1,3} [size = nrow+1 = 2+1] 3792 j = {0,0,2} [size = nz = 6] 3793 v = {1,2,3} [size = nz = 6] 3794 3795 Process1 [P1]: rows_owned=[2] 3796 i = {0,3} [size = nrow+1 = 1+1] 3797 j = {0,1,2} [size = nz = 6] 3798 v = {4,5,6} [size = nz = 6] 3799 3800 .keywords: matrix, aij, compressed row, sparse, parallel 3801 3802 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3803 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3804 @*/ 3805 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3806 { 3807 PetscErrorCode ierr; 3808 3809 PetscFunctionBegin; 3810 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3811 PetscFunctionReturn(0); 3812 } 3813 3814 #undef __FUNCT__ 3815 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3816 /*@C 3817 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3818 (the default parallel PETSc format). For good matrix assembly performance 3819 the user should preallocate the matrix storage by setting the parameters 3820 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3821 performance can be increased by more than a factor of 50. 3822 3823 Collective on MPI_Comm 3824 3825 Input Parameters: 3826 + A - the matrix 3827 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3828 (same value is used for all local rows) 3829 . d_nnz - array containing the number of nonzeros in the various rows of the 3830 DIAGONAL portion of the local submatrix (possibly different for each row) 3831 or NULL, if d_nz is used to specify the nonzero structure. 3832 The size of this array is equal to the number of local rows, i.e 'm'. 3833 For matrices that will be factored, you must leave room for (and set) 3834 the diagonal entry even if it is zero. 3835 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3836 submatrix (same value is used for all local rows). 3837 - o_nnz - array containing the number of nonzeros in the various rows of the 3838 OFF-DIAGONAL portion of the local submatrix (possibly different for 3839 each row) or NULL, if o_nz is used to specify the nonzero 3840 structure. The size of this array is equal to the number 3841 of local rows, i.e 'm'. 3842 3843 If the *_nnz parameter is given then the *_nz parameter is ignored 3844 3845 The AIJ format (also called the Yale sparse matrix format or 3846 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3847 storage. The stored row and column indices begin with zero. 3848 See Users-Manual: ch_mat for details. 3849 3850 The parallel matrix is partitioned such that the first m0 rows belong to 3851 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3852 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3853 3854 The DIAGONAL portion of the local submatrix of a processor can be defined 3855 as the submatrix which is obtained by extraction the part corresponding to 3856 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3857 first row that belongs to the processor, r2 is the last row belonging to 3858 the this processor, and c1-c2 is range of indices of the local part of a 3859 vector suitable for applying the matrix to. This is an mxn matrix. In the 3860 common case of a square matrix, the row and column ranges are the same and 3861 the DIAGONAL part is also square. The remaining portion of the local 3862 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3863 3864 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3865 3866 You can call MatGetInfo() to get information on how effective the preallocation was; 3867 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3868 You can also run with the option -info and look for messages with the string 3869 malloc in them to see if additional memory allocation was needed. 3870 3871 Example usage: 3872 3873 Consider the following 8x8 matrix with 34 non-zero values, that is 3874 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3875 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3876 as follows: 3877 3878 .vb 3879 1 2 0 | 0 3 0 | 0 4 3880 Proc0 0 5 6 | 7 0 0 | 8 0 3881 9 0 10 | 11 0 0 | 12 0 3882 ------------------------------------- 3883 13 0 14 | 15 16 17 | 0 0 3884 Proc1 0 18 0 | 19 20 21 | 0 0 3885 0 0 0 | 22 23 0 | 24 0 3886 ------------------------------------- 3887 Proc2 25 26 27 | 0 0 28 | 29 0 3888 30 0 0 | 31 32 33 | 0 34 3889 .ve 3890 3891 This can be represented as a collection of submatrices as: 3892 3893 .vb 3894 A B C 3895 D E F 3896 G H I 3897 .ve 3898 3899 Where the submatrices A,B,C are owned by proc0, D,E,F are 3900 owned by proc1, G,H,I are owned by proc2. 3901 3902 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3903 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3904 The 'M','N' parameters are 8,8, and have the same values on all procs. 3905 3906 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3907 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3908 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3909 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3910 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3911 matrix, ans [DF] as another SeqAIJ matrix. 3912 3913 When d_nz, o_nz parameters are specified, d_nz storage elements are 3914 allocated for every row of the local diagonal submatrix, and o_nz 3915 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3916 One way to choose d_nz and o_nz is to use the max nonzerors per local 3917 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3918 In this case, the values of d_nz,o_nz are: 3919 .vb 3920 proc0 : dnz = 2, o_nz = 2 3921 proc1 : dnz = 3, o_nz = 2 3922 proc2 : dnz = 1, o_nz = 4 3923 .ve 3924 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3925 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3926 for proc3. i.e we are using 12+15+10=37 storage locations to store 3927 34 values. 3928 3929 When d_nnz, o_nnz parameters are specified, the storage is specified 3930 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3931 In the above case the values for d_nnz,o_nnz are: 3932 .vb 3933 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3934 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3935 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3936 .ve 3937 Here the space allocated is sum of all the above values i.e 34, and 3938 hence pre-allocation is perfect. 3939 3940 Level: intermediate 3941 3942 .keywords: matrix, aij, compressed row, sparse, parallel 3943 3944 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3945 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3946 @*/ 3947 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3948 { 3949 PetscErrorCode ierr; 3950 3951 PetscFunctionBegin; 3952 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3953 PetscValidType(B,1); 3954 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3955 PetscFunctionReturn(0); 3956 } 3957 3958 #undef __FUNCT__ 3959 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3960 /*@ 3961 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3962 CSR format the local rows. 3963 3964 Collective on MPI_Comm 3965 3966 Input Parameters: 3967 + comm - MPI communicator 3968 . m - number of local rows (Cannot be PETSC_DECIDE) 3969 . n - This value should be the same as the local size used in creating the 3970 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3971 calculated if N is given) For square matrices n is almost always m. 3972 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3973 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3974 . i - row indices 3975 . j - column indices 3976 - a - matrix values 3977 3978 Output Parameter: 3979 . mat - the matrix 3980 3981 Level: intermediate 3982 3983 Notes: 3984 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3985 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3986 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3987 3988 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3989 3990 The format which is used for the sparse matrix input, is equivalent to a 3991 row-major ordering.. i.e for the following matrix, the input data expected is 3992 as shown: 3993 3994 1 0 0 3995 2 0 3 P0 3996 ------- 3997 4 5 6 P1 3998 3999 Process0 [P0]: rows_owned=[0,1] 4000 i = {0,1,3} [size = nrow+1 = 2+1] 4001 j = {0,0,2} [size = nz = 6] 4002 v = {1,2,3} [size = nz = 6] 4003 4004 Process1 [P1]: rows_owned=[2] 4005 i = {0,3} [size = nrow+1 = 1+1] 4006 j = {0,1,2} [size = nz = 6] 4007 v = {4,5,6} [size = nz = 6] 4008 4009 .keywords: matrix, aij, compressed row, sparse, parallel 4010 4011 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4012 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4013 @*/ 4014 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4015 { 4016 PetscErrorCode ierr; 4017 4018 PetscFunctionBegin; 4019 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4020 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4021 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4022 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4023 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4024 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4025 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4026 PetscFunctionReturn(0); 4027 } 4028 4029 #undef __FUNCT__ 4030 #define __FUNCT__ "MatCreateAIJ" 4031 /*@C 4032 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4033 (the default parallel PETSc format). For good matrix assembly performance 4034 the user should preallocate the matrix storage by setting the parameters 4035 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4036 performance can be increased by more than a factor of 50. 4037 4038 Collective on MPI_Comm 4039 4040 Input Parameters: 4041 + comm - MPI communicator 4042 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4043 This value should be the same as the local size used in creating the 4044 y vector for the matrix-vector product y = Ax. 4045 . n - This value should be the same as the local size used in creating the 4046 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4047 calculated if N is given) For square matrices n is almost always m. 4048 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4049 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4050 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4051 (same value is used for all local rows) 4052 . d_nnz - array containing the number of nonzeros in the various rows of the 4053 DIAGONAL portion of the local submatrix (possibly different for each row) 4054 or NULL, if d_nz is used to specify the nonzero structure. 4055 The size of this array is equal to the number of local rows, i.e 'm'. 4056 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4057 submatrix (same value is used for all local rows). 4058 - o_nnz - array containing the number of nonzeros in the various rows of the 4059 OFF-DIAGONAL portion of the local submatrix (possibly different for 4060 each row) or NULL, if o_nz is used to specify the nonzero 4061 structure. The size of this array is equal to the number 4062 of local rows, i.e 'm'. 4063 4064 Output Parameter: 4065 . A - the matrix 4066 4067 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4068 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4069 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4070 4071 Notes: 4072 If the *_nnz parameter is given then the *_nz parameter is ignored 4073 4074 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4075 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4076 storage requirements for this matrix. 4077 4078 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4079 processor than it must be used on all processors that share the object for 4080 that argument. 4081 4082 The user MUST specify either the local or global matrix dimensions 4083 (possibly both). 4084 4085 The parallel matrix is partitioned across processors such that the 4086 first m0 rows belong to process 0, the next m1 rows belong to 4087 process 1, the next m2 rows belong to process 2 etc.. where 4088 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4089 values corresponding to [m x N] submatrix. 4090 4091 The columns are logically partitioned with the n0 columns belonging 4092 to 0th partition, the next n1 columns belonging to the next 4093 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4094 4095 The DIAGONAL portion of the local submatrix on any given processor 4096 is the submatrix corresponding to the rows and columns m,n 4097 corresponding to the given processor. i.e diagonal matrix on 4098 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4099 etc. The remaining portion of the local submatrix [m x (N-n)] 4100 constitute the OFF-DIAGONAL portion. The example below better 4101 illustrates this concept. 4102 4103 For a square global matrix we define each processor's diagonal portion 4104 to be its local rows and the corresponding columns (a square submatrix); 4105 each processor's off-diagonal portion encompasses the remainder of the 4106 local matrix (a rectangular submatrix). 4107 4108 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4109 4110 When calling this routine with a single process communicator, a matrix of 4111 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4112 type of communicator, use the construction mechanism: 4113 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4114 4115 By default, this format uses inodes (identical nodes) when possible. 4116 We search for consecutive rows with the same nonzero structure, thereby 4117 reusing matrix information to achieve increased efficiency. 4118 4119 Options Database Keys: 4120 + -mat_no_inode - Do not use inodes 4121 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4122 - -mat_aij_oneindex - Internally use indexing starting at 1 4123 rather than 0. Note that when calling MatSetValues(), 4124 the user still MUST index entries starting at 0! 4125 4126 4127 Example usage: 4128 4129 Consider the following 8x8 matrix with 34 non-zero values, that is 4130 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4131 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4132 as follows: 4133 4134 .vb 4135 1 2 0 | 0 3 0 | 0 4 4136 Proc0 0 5 6 | 7 0 0 | 8 0 4137 9 0 10 | 11 0 0 | 12 0 4138 ------------------------------------- 4139 13 0 14 | 15 16 17 | 0 0 4140 Proc1 0 18 0 | 19 20 21 | 0 0 4141 0 0 0 | 22 23 0 | 24 0 4142 ------------------------------------- 4143 Proc2 25 26 27 | 0 0 28 | 29 0 4144 30 0 0 | 31 32 33 | 0 34 4145 .ve 4146 4147 This can be represented as a collection of submatrices as: 4148 4149 .vb 4150 A B C 4151 D E F 4152 G H I 4153 .ve 4154 4155 Where the submatrices A,B,C are owned by proc0, D,E,F are 4156 owned by proc1, G,H,I are owned by proc2. 4157 4158 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4159 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4160 The 'M','N' parameters are 8,8, and have the same values on all procs. 4161 4162 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4163 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4164 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4165 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4166 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4167 matrix, ans [DF] as another SeqAIJ matrix. 4168 4169 When d_nz, o_nz parameters are specified, d_nz storage elements are 4170 allocated for every row of the local diagonal submatrix, and o_nz 4171 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4172 One way to choose d_nz and o_nz is to use the max nonzerors per local 4173 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4174 In this case, the values of d_nz,o_nz are: 4175 .vb 4176 proc0 : dnz = 2, o_nz = 2 4177 proc1 : dnz = 3, o_nz = 2 4178 proc2 : dnz = 1, o_nz = 4 4179 .ve 4180 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4181 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4182 for proc3. i.e we are using 12+15+10=37 storage locations to store 4183 34 values. 4184 4185 When d_nnz, o_nnz parameters are specified, the storage is specified 4186 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4187 In the above case the values for d_nnz,o_nnz are: 4188 .vb 4189 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4190 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4191 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4192 .ve 4193 Here the space allocated is sum of all the above values i.e 34, and 4194 hence pre-allocation is perfect. 4195 4196 Level: intermediate 4197 4198 .keywords: matrix, aij, compressed row, sparse, parallel 4199 4200 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4201 MPIAIJ, MatCreateMPIAIJWithArrays() 4202 @*/ 4203 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4204 { 4205 PetscErrorCode ierr; 4206 PetscMPIInt size; 4207 4208 PetscFunctionBegin; 4209 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4210 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4211 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4212 if (size > 1) { 4213 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4214 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4215 } else { 4216 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4217 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4218 } 4219 PetscFunctionReturn(0); 4220 } 4221 4222 #undef __FUNCT__ 4223 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4224 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4225 { 4226 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4227 4228 PetscFunctionBegin; 4229 *Ad = a->A; 4230 *Ao = a->B; 4231 *colmap = a->garray; 4232 PetscFunctionReturn(0); 4233 } 4234 4235 #undef __FUNCT__ 4236 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4237 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4238 { 4239 PetscErrorCode ierr; 4240 PetscInt i; 4241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4242 4243 PetscFunctionBegin; 4244 if (coloring->ctype == IS_COLORING_GLOBAL) { 4245 ISColoringValue *allcolors,*colors; 4246 ISColoring ocoloring; 4247 4248 /* set coloring for diagonal portion */ 4249 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4250 4251 /* set coloring for off-diagonal portion */ 4252 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4253 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4254 for (i=0; i<a->B->cmap->n; i++) { 4255 colors[i] = allcolors[a->garray[i]]; 4256 } 4257 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4258 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4259 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4260 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4261 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4262 ISColoringValue *colors; 4263 PetscInt *larray; 4264 ISColoring ocoloring; 4265 4266 /* set coloring for diagonal portion */ 4267 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4268 for (i=0; i<a->A->cmap->n; i++) { 4269 larray[i] = i + A->cmap->rstart; 4270 } 4271 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4272 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4273 for (i=0; i<a->A->cmap->n; i++) { 4274 colors[i] = coloring->colors[larray[i]]; 4275 } 4276 ierr = PetscFree(larray);CHKERRQ(ierr); 4277 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4278 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4279 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4280 4281 /* set coloring for off-diagonal portion */ 4282 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4283 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4284 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4285 for (i=0; i<a->B->cmap->n; i++) { 4286 colors[i] = coloring->colors[larray[i]]; 4287 } 4288 ierr = PetscFree(larray);CHKERRQ(ierr); 4289 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4290 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4291 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4292 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4293 PetscFunctionReturn(0); 4294 } 4295 4296 #undef __FUNCT__ 4297 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4298 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4299 { 4300 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4301 PetscErrorCode ierr; 4302 4303 PetscFunctionBegin; 4304 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4305 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4306 PetscFunctionReturn(0); 4307 } 4308 4309 #undef __FUNCT__ 4310 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4311 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4312 { 4313 PetscErrorCode ierr; 4314 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4315 PetscInt *indx; 4316 4317 PetscFunctionBegin; 4318 /* This routine will ONLY return MPIAIJ type matrix */ 4319 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4320 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4321 if (n == PETSC_DECIDE) { 4322 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4323 } 4324 /* Check sum(n) = N */ 4325 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4326 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4327 4328 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4329 rstart -= m; 4330 4331 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4332 for (i=0; i<m; i++) { 4333 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4334 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4335 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4336 } 4337 4338 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4339 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4340 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4341 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4342 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4343 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4344 PetscFunctionReturn(0); 4345 } 4346 4347 #undef __FUNCT__ 4348 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4349 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4350 { 4351 PetscErrorCode ierr; 4352 PetscInt m,N,i,rstart,nnz,Ii; 4353 PetscInt *indx; 4354 PetscScalar *values; 4355 4356 PetscFunctionBegin; 4357 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4358 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4359 for (i=0; i<m; i++) { 4360 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4361 Ii = i + rstart; 4362 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4363 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4364 } 4365 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4366 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4367 PetscFunctionReturn(0); 4368 } 4369 4370 #undef __FUNCT__ 4371 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4372 /*@ 4373 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4374 matrices from each processor 4375 4376 Collective on MPI_Comm 4377 4378 Input Parameters: 4379 + comm - the communicators the parallel matrix will live on 4380 . inmat - the input sequential matrices 4381 . n - number of local columns (or PETSC_DECIDE) 4382 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4383 4384 Output Parameter: 4385 . outmat - the parallel matrix generated 4386 4387 Level: advanced 4388 4389 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4390 4391 @*/ 4392 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4393 { 4394 PetscErrorCode ierr; 4395 PetscMPIInt size; 4396 4397 PetscFunctionBegin; 4398 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4399 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4400 if (size == 1) { 4401 if (scall == MAT_INITIAL_MATRIX) { 4402 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4403 } else { 4404 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4405 } 4406 } else { 4407 if (scall == MAT_INITIAL_MATRIX) { 4408 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4409 } 4410 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4411 } 4412 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4413 PetscFunctionReturn(0); 4414 } 4415 4416 #undef __FUNCT__ 4417 #define __FUNCT__ "MatFileSplit" 4418 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4419 { 4420 PetscErrorCode ierr; 4421 PetscMPIInt rank; 4422 PetscInt m,N,i,rstart,nnz; 4423 size_t len; 4424 const PetscInt *indx; 4425 PetscViewer out; 4426 char *name; 4427 Mat B; 4428 const PetscScalar *values; 4429 4430 PetscFunctionBegin; 4431 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4432 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4433 /* Should this be the type of the diagonal block of A? */ 4434 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4435 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4436 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4437 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4438 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4439 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4440 for (i=0; i<m; i++) { 4441 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4442 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4443 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4444 } 4445 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4446 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4447 4448 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4449 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4450 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4451 sprintf(name,"%s.%d",outfile,rank); 4452 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4453 ierr = PetscFree(name);CHKERRQ(ierr); 4454 ierr = MatView(B,out);CHKERRQ(ierr); 4455 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4456 ierr = MatDestroy(&B);CHKERRQ(ierr); 4457 PetscFunctionReturn(0); 4458 } 4459 4460 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4461 #undef __FUNCT__ 4462 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4463 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4464 { 4465 PetscErrorCode ierr; 4466 Mat_Merge_SeqsToMPI *merge; 4467 PetscContainer container; 4468 4469 PetscFunctionBegin; 4470 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4471 if (container) { 4472 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4473 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4474 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4475 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4476 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4477 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4478 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4479 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4485 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4486 ierr = PetscFree(merge);CHKERRQ(ierr); 4487 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4488 } 4489 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4490 PetscFunctionReturn(0); 4491 } 4492 4493 #include <../src/mat/utils/freespace.h> 4494 #include <petscbt.h> 4495 4496 #undef __FUNCT__ 4497 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4498 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4499 { 4500 PetscErrorCode ierr; 4501 MPI_Comm comm; 4502 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4503 PetscMPIInt size,rank,taga,*len_s; 4504 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4505 PetscInt proc,m; 4506 PetscInt **buf_ri,**buf_rj; 4507 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4508 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4509 MPI_Request *s_waits,*r_waits; 4510 MPI_Status *status; 4511 MatScalar *aa=a->a; 4512 MatScalar **abuf_r,*ba_i; 4513 Mat_Merge_SeqsToMPI *merge; 4514 PetscContainer container; 4515 4516 PetscFunctionBegin; 4517 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4518 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4519 4520 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4521 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4522 4523 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4524 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4525 4526 bi = merge->bi; 4527 bj = merge->bj; 4528 buf_ri = merge->buf_ri; 4529 buf_rj = merge->buf_rj; 4530 4531 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4532 owners = merge->rowmap->range; 4533 len_s = merge->len_s; 4534 4535 /* send and recv matrix values */ 4536 /*-----------------------------*/ 4537 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4538 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4539 4540 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4541 for (proc=0,k=0; proc<size; proc++) { 4542 if (!len_s[proc]) continue; 4543 i = owners[proc]; 4544 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4545 k++; 4546 } 4547 4548 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4549 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4550 ierr = PetscFree(status);CHKERRQ(ierr); 4551 4552 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4553 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4554 4555 /* insert mat values of mpimat */ 4556 /*----------------------------*/ 4557 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4558 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4559 4560 for (k=0; k<merge->nrecv; k++) { 4561 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4562 nrows = *(buf_ri_k[k]); 4563 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4564 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4565 } 4566 4567 /* set values of ba */ 4568 m = merge->rowmap->n; 4569 for (i=0; i<m; i++) { 4570 arow = owners[rank] + i; 4571 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4572 bnzi = bi[i+1] - bi[i]; 4573 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4574 4575 /* add local non-zero vals of this proc's seqmat into ba */ 4576 anzi = ai[arow+1] - ai[arow]; 4577 aj = a->j + ai[arow]; 4578 aa = a->a + ai[arow]; 4579 nextaj = 0; 4580 for (j=0; nextaj<anzi; j++) { 4581 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4582 ba_i[j] += aa[nextaj++]; 4583 } 4584 } 4585 4586 /* add received vals into ba */ 4587 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4588 /* i-th row */ 4589 if (i == *nextrow[k]) { 4590 anzi = *(nextai[k]+1) - *nextai[k]; 4591 aj = buf_rj[k] + *(nextai[k]); 4592 aa = abuf_r[k] + *(nextai[k]); 4593 nextaj = 0; 4594 for (j=0; nextaj<anzi; j++) { 4595 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4596 ba_i[j] += aa[nextaj++]; 4597 } 4598 } 4599 nextrow[k]++; nextai[k]++; 4600 } 4601 } 4602 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4603 } 4604 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4605 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4606 4607 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4608 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4609 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4610 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4611 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4612 PetscFunctionReturn(0); 4613 } 4614 4615 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4616 4617 #undef __FUNCT__ 4618 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4619 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4620 { 4621 PetscErrorCode ierr; 4622 Mat B_mpi; 4623 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4624 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4625 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4626 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4627 PetscInt len,proc,*dnz,*onz,bs,cbs; 4628 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4629 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4630 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4631 MPI_Status *status; 4632 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4633 PetscBT lnkbt; 4634 Mat_Merge_SeqsToMPI *merge; 4635 PetscContainer container; 4636 4637 PetscFunctionBegin; 4638 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4639 4640 /* make sure it is a PETSc comm */ 4641 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4642 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4643 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4644 4645 ierr = PetscNew(&merge);CHKERRQ(ierr); 4646 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4647 4648 /* determine row ownership */ 4649 /*---------------------------------------------------------*/ 4650 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4651 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4652 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4653 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4654 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4655 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4656 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4657 4658 m = merge->rowmap->n; 4659 owners = merge->rowmap->range; 4660 4661 /* determine the number of messages to send, their lengths */ 4662 /*---------------------------------------------------------*/ 4663 len_s = merge->len_s; 4664 4665 len = 0; /* length of buf_si[] */ 4666 merge->nsend = 0; 4667 for (proc=0; proc<size; proc++) { 4668 len_si[proc] = 0; 4669 if (proc == rank) { 4670 len_s[proc] = 0; 4671 } else { 4672 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4673 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4674 } 4675 if (len_s[proc]) { 4676 merge->nsend++; 4677 nrows = 0; 4678 for (i=owners[proc]; i<owners[proc+1]; i++) { 4679 if (ai[i+1] > ai[i]) nrows++; 4680 } 4681 len_si[proc] = 2*(nrows+1); 4682 len += len_si[proc]; 4683 } 4684 } 4685 4686 /* determine the number and length of messages to receive for ij-structure */ 4687 /*-------------------------------------------------------------------------*/ 4688 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4689 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4690 4691 /* post the Irecv of j-structure */ 4692 /*-------------------------------*/ 4693 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4694 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4695 4696 /* post the Isend of j-structure */ 4697 /*--------------------------------*/ 4698 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4699 4700 for (proc=0, k=0; proc<size; proc++) { 4701 if (!len_s[proc]) continue; 4702 i = owners[proc]; 4703 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4704 k++; 4705 } 4706 4707 /* receives and sends of j-structure are complete */ 4708 /*------------------------------------------------*/ 4709 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4710 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4711 4712 /* send and recv i-structure */ 4713 /*---------------------------*/ 4714 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4715 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4716 4717 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4718 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4719 for (proc=0,k=0; proc<size; proc++) { 4720 if (!len_s[proc]) continue; 4721 /* form outgoing message for i-structure: 4722 buf_si[0]: nrows to be sent 4723 [1:nrows]: row index (global) 4724 [nrows+1:2*nrows+1]: i-structure index 4725 */ 4726 /*-------------------------------------------*/ 4727 nrows = len_si[proc]/2 - 1; 4728 buf_si_i = buf_si + nrows+1; 4729 buf_si[0] = nrows; 4730 buf_si_i[0] = 0; 4731 nrows = 0; 4732 for (i=owners[proc]; i<owners[proc+1]; i++) { 4733 anzi = ai[i+1] - ai[i]; 4734 if (anzi) { 4735 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4736 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4737 nrows++; 4738 } 4739 } 4740 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4741 k++; 4742 buf_si += len_si[proc]; 4743 } 4744 4745 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4746 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4747 4748 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4749 for (i=0; i<merge->nrecv; i++) { 4750 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4751 } 4752 4753 ierr = PetscFree(len_si);CHKERRQ(ierr); 4754 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4755 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4756 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4757 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4758 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4759 ierr = PetscFree(status);CHKERRQ(ierr); 4760 4761 /* compute a local seq matrix in each processor */ 4762 /*----------------------------------------------*/ 4763 /* allocate bi array and free space for accumulating nonzero column info */ 4764 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4765 bi[0] = 0; 4766 4767 /* create and initialize a linked list */ 4768 nlnk = N+1; 4769 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4770 4771 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4772 len = ai[owners[rank+1]] - ai[owners[rank]]; 4773 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4774 4775 current_space = free_space; 4776 4777 /* determine symbolic info for each local row */ 4778 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4779 4780 for (k=0; k<merge->nrecv; k++) { 4781 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4782 nrows = *buf_ri_k[k]; 4783 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4784 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4785 } 4786 4787 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4788 len = 0; 4789 for (i=0; i<m; i++) { 4790 bnzi = 0; 4791 /* add local non-zero cols of this proc's seqmat into lnk */ 4792 arow = owners[rank] + i; 4793 anzi = ai[arow+1] - ai[arow]; 4794 aj = a->j + ai[arow]; 4795 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4796 bnzi += nlnk; 4797 /* add received col data into lnk */ 4798 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4799 if (i == *nextrow[k]) { /* i-th row */ 4800 anzi = *(nextai[k]+1) - *nextai[k]; 4801 aj = buf_rj[k] + *nextai[k]; 4802 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4803 bnzi += nlnk; 4804 nextrow[k]++; nextai[k]++; 4805 } 4806 } 4807 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4808 4809 /* if free space is not available, make more free space */ 4810 if (current_space->local_remaining<bnzi) { 4811 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4812 nspacedouble++; 4813 } 4814 /* copy data into free space, then initialize lnk */ 4815 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4816 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4817 4818 current_space->array += bnzi; 4819 current_space->local_used += bnzi; 4820 current_space->local_remaining -= bnzi; 4821 4822 bi[i+1] = bi[i] + bnzi; 4823 } 4824 4825 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4826 4827 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4828 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4829 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4830 4831 /* create symbolic parallel matrix B_mpi */ 4832 /*---------------------------------------*/ 4833 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4834 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4835 if (n==PETSC_DECIDE) { 4836 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4837 } else { 4838 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4839 } 4840 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4841 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4842 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4843 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4844 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4845 4846 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4847 B_mpi->assembled = PETSC_FALSE; 4848 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4849 merge->bi = bi; 4850 merge->bj = bj; 4851 merge->buf_ri = buf_ri; 4852 merge->buf_rj = buf_rj; 4853 merge->coi = NULL; 4854 merge->coj = NULL; 4855 merge->owners_co = NULL; 4856 4857 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4858 4859 /* attach the supporting struct to B_mpi for reuse */ 4860 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4861 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4862 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4863 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4864 *mpimat = B_mpi; 4865 4866 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4867 PetscFunctionReturn(0); 4868 } 4869 4870 #undef __FUNCT__ 4871 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4872 /*@C 4873 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4874 matrices from each processor 4875 4876 Collective on MPI_Comm 4877 4878 Input Parameters: 4879 + comm - the communicators the parallel matrix will live on 4880 . seqmat - the input sequential matrices 4881 . m - number of local rows (or PETSC_DECIDE) 4882 . n - number of local columns (or PETSC_DECIDE) 4883 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4884 4885 Output Parameter: 4886 . mpimat - the parallel matrix generated 4887 4888 Level: advanced 4889 4890 Notes: 4891 The dimensions of the sequential matrix in each processor MUST be the same. 4892 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4893 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4894 @*/ 4895 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4896 { 4897 PetscErrorCode ierr; 4898 PetscMPIInt size; 4899 4900 PetscFunctionBegin; 4901 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4902 if (size == 1) { 4903 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4904 if (scall == MAT_INITIAL_MATRIX) { 4905 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4906 } else { 4907 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4908 } 4909 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4910 PetscFunctionReturn(0); 4911 } 4912 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4913 if (scall == MAT_INITIAL_MATRIX) { 4914 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4915 } 4916 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4917 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4918 PetscFunctionReturn(0); 4919 } 4920 4921 #undef __FUNCT__ 4922 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4923 /*@ 4924 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4925 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4926 with MatGetSize() 4927 4928 Not Collective 4929 4930 Input Parameters: 4931 + A - the matrix 4932 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4933 4934 Output Parameter: 4935 . A_loc - the local sequential matrix generated 4936 4937 Level: developer 4938 4939 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4940 4941 @*/ 4942 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4943 { 4944 PetscErrorCode ierr; 4945 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4946 Mat_SeqAIJ *mat,*a,*b; 4947 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4948 MatScalar *aa,*ba,*cam; 4949 PetscScalar *ca; 4950 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4951 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4952 PetscBool match; 4953 4954 PetscFunctionBegin; 4955 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4956 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4957 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4958 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4959 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4960 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4961 aa = a->a; ba = b->a; 4962 if (scall == MAT_INITIAL_MATRIX) { 4963 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4964 ci[0] = 0; 4965 for (i=0; i<am; i++) { 4966 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4967 } 4968 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4969 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4970 k = 0; 4971 for (i=0; i<am; i++) { 4972 ncols_o = bi[i+1] - bi[i]; 4973 ncols_d = ai[i+1] - ai[i]; 4974 /* off-diagonal portion of A */ 4975 for (jo=0; jo<ncols_o; jo++) { 4976 col = cmap[*bj]; 4977 if (col >= cstart) break; 4978 cj[k] = col; bj++; 4979 ca[k++] = *ba++; 4980 } 4981 /* diagonal portion of A */ 4982 for (j=0; j<ncols_d; j++) { 4983 cj[k] = cstart + *aj++; 4984 ca[k++] = *aa++; 4985 } 4986 /* off-diagonal portion of A */ 4987 for (j=jo; j<ncols_o; j++) { 4988 cj[k] = cmap[*bj++]; 4989 ca[k++] = *ba++; 4990 } 4991 } 4992 /* put together the new matrix */ 4993 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4994 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4995 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4996 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4997 mat->free_a = PETSC_TRUE; 4998 mat->free_ij = PETSC_TRUE; 4999 mat->nonew = 0; 5000 } else if (scall == MAT_REUSE_MATRIX) { 5001 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5002 ci = mat->i; cj = mat->j; cam = mat->a; 5003 for (i=0; i<am; i++) { 5004 /* off-diagonal portion of A */ 5005 ncols_o = bi[i+1] - bi[i]; 5006 for (jo=0; jo<ncols_o; jo++) { 5007 col = cmap[*bj]; 5008 if (col >= cstart) break; 5009 *cam++ = *ba++; bj++; 5010 } 5011 /* diagonal portion of A */ 5012 ncols_d = ai[i+1] - ai[i]; 5013 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5014 /* off-diagonal portion of A */ 5015 for (j=jo; j<ncols_o; j++) { 5016 *cam++ = *ba++; bj++; 5017 } 5018 } 5019 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5020 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5021 PetscFunctionReturn(0); 5022 } 5023 5024 #undef __FUNCT__ 5025 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5026 /*@C 5027 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5028 5029 Not Collective 5030 5031 Input Parameters: 5032 + A - the matrix 5033 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5034 - row, col - index sets of rows and columns to extract (or NULL) 5035 5036 Output Parameter: 5037 . A_loc - the local sequential matrix generated 5038 5039 Level: developer 5040 5041 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5042 5043 @*/ 5044 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5045 { 5046 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5047 PetscErrorCode ierr; 5048 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5049 IS isrowa,iscola; 5050 Mat *aloc; 5051 PetscBool match; 5052 5053 PetscFunctionBegin; 5054 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5055 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5056 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5057 if (!row) { 5058 start = A->rmap->rstart; end = A->rmap->rend; 5059 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5060 } else { 5061 isrowa = *row; 5062 } 5063 if (!col) { 5064 start = A->cmap->rstart; 5065 cmap = a->garray; 5066 nzA = a->A->cmap->n; 5067 nzB = a->B->cmap->n; 5068 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5069 ncols = 0; 5070 for (i=0; i<nzB; i++) { 5071 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5072 else break; 5073 } 5074 imark = i; 5075 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5076 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5077 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5078 } else { 5079 iscola = *col; 5080 } 5081 if (scall != MAT_INITIAL_MATRIX) { 5082 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5083 aloc[0] = *A_loc; 5084 } 5085 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5086 *A_loc = aloc[0]; 5087 ierr = PetscFree(aloc);CHKERRQ(ierr); 5088 if (!row) { 5089 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5090 } 5091 if (!col) { 5092 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5093 } 5094 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5095 PetscFunctionReturn(0); 5096 } 5097 5098 #undef __FUNCT__ 5099 #define __FUNCT__ "MatGetBrowsOfAcols" 5100 /*@C 5101 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5102 5103 Collective on Mat 5104 5105 Input Parameters: 5106 + A,B - the matrices in mpiaij format 5107 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5108 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5109 5110 Output Parameter: 5111 + rowb, colb - index sets of rows and columns of B to extract 5112 - B_seq - the sequential matrix generated 5113 5114 Level: developer 5115 5116 @*/ 5117 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5118 { 5119 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5120 PetscErrorCode ierr; 5121 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5122 IS isrowb,iscolb; 5123 Mat *bseq=NULL; 5124 5125 PetscFunctionBegin; 5126 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5127 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5128 } 5129 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5130 5131 if (scall == MAT_INITIAL_MATRIX) { 5132 start = A->cmap->rstart; 5133 cmap = a->garray; 5134 nzA = a->A->cmap->n; 5135 nzB = a->B->cmap->n; 5136 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5137 ncols = 0; 5138 for (i=0; i<nzB; i++) { /* row < local row index */ 5139 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5140 else break; 5141 } 5142 imark = i; 5143 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5144 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5145 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5146 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5147 } else { 5148 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5149 isrowb = *rowb; iscolb = *colb; 5150 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5151 bseq[0] = *B_seq; 5152 } 5153 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5154 *B_seq = bseq[0]; 5155 ierr = PetscFree(bseq);CHKERRQ(ierr); 5156 if (!rowb) { 5157 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5158 } else { 5159 *rowb = isrowb; 5160 } 5161 if (!colb) { 5162 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5163 } else { 5164 *colb = iscolb; 5165 } 5166 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5167 PetscFunctionReturn(0); 5168 } 5169 5170 #undef __FUNCT__ 5171 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5172 /* 5173 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5174 of the OFF-DIAGONAL portion of local A 5175 5176 Collective on Mat 5177 5178 Input Parameters: 5179 + A,B - the matrices in mpiaij format 5180 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5181 5182 Output Parameter: 5183 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5184 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5185 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5186 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5187 5188 Level: developer 5189 5190 */ 5191 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5192 { 5193 VecScatter_MPI_General *gen_to,*gen_from; 5194 PetscErrorCode ierr; 5195 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5196 Mat_SeqAIJ *b_oth; 5197 VecScatter ctx =a->Mvctx; 5198 MPI_Comm comm; 5199 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5200 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5201 PetscScalar *rvalues,*svalues; 5202 MatScalar *b_otha,*bufa,*bufA; 5203 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5204 MPI_Request *rwaits = NULL,*swaits = NULL; 5205 MPI_Status *sstatus,rstatus; 5206 PetscMPIInt jj; 5207 PetscInt *cols,sbs,rbs; 5208 PetscScalar *vals; 5209 5210 PetscFunctionBegin; 5211 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5212 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5213 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5214 } 5215 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5216 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5217 5218 gen_to = (VecScatter_MPI_General*)ctx->todata; 5219 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5220 rvalues = gen_from->values; /* holds the length of receiving row */ 5221 svalues = gen_to->values; /* holds the length of sending row */ 5222 nrecvs = gen_from->n; 5223 nsends = gen_to->n; 5224 5225 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5226 srow = gen_to->indices; /* local row index to be sent */ 5227 sstarts = gen_to->starts; 5228 sprocs = gen_to->procs; 5229 sstatus = gen_to->sstatus; 5230 sbs = gen_to->bs; 5231 rstarts = gen_from->starts; 5232 rprocs = gen_from->procs; 5233 rbs = gen_from->bs; 5234 5235 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5236 if (scall == MAT_INITIAL_MATRIX) { 5237 /* i-array */ 5238 /*---------*/ 5239 /* post receives */ 5240 for (i=0; i<nrecvs; i++) { 5241 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5242 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5243 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5244 } 5245 5246 /* pack the outgoing message */ 5247 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5248 5249 sstartsj[0] = 0; 5250 rstartsj[0] = 0; 5251 len = 0; /* total length of j or a array to be sent */ 5252 k = 0; 5253 for (i=0; i<nsends; i++) { 5254 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5255 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5256 for (j=0; j<nrows; j++) { 5257 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5258 for (l=0; l<sbs; l++) { 5259 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5260 5261 rowlen[j*sbs+l] = ncols; 5262 5263 len += ncols; 5264 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5265 } 5266 k++; 5267 } 5268 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5269 5270 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5271 } 5272 /* recvs and sends of i-array are completed */ 5273 i = nrecvs; 5274 while (i--) { 5275 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5276 } 5277 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5278 5279 /* allocate buffers for sending j and a arrays */ 5280 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5281 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5282 5283 /* create i-array of B_oth */ 5284 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5285 5286 b_othi[0] = 0; 5287 len = 0; /* total length of j or a array to be received */ 5288 k = 0; 5289 for (i=0; i<nrecvs; i++) { 5290 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5291 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5292 for (j=0; j<nrows; j++) { 5293 b_othi[k+1] = b_othi[k] + rowlen[j]; 5294 len += rowlen[j]; k++; 5295 } 5296 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5297 } 5298 5299 /* allocate space for j and a arrrays of B_oth */ 5300 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5301 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5302 5303 /* j-array */ 5304 /*---------*/ 5305 /* post receives of j-array */ 5306 for (i=0; i<nrecvs; i++) { 5307 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5308 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5309 } 5310 5311 /* pack the outgoing message j-array */ 5312 k = 0; 5313 for (i=0; i<nsends; i++) { 5314 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5315 bufJ = bufj+sstartsj[i]; 5316 for (j=0; j<nrows; j++) { 5317 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5318 for (ll=0; ll<sbs; ll++) { 5319 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5320 for (l=0; l<ncols; l++) { 5321 *bufJ++ = cols[l]; 5322 } 5323 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5324 } 5325 } 5326 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5327 } 5328 5329 /* recvs and sends of j-array are completed */ 5330 i = nrecvs; 5331 while (i--) { 5332 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5333 } 5334 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5335 } else if (scall == MAT_REUSE_MATRIX) { 5336 sstartsj = *startsj_s; 5337 rstartsj = *startsj_r; 5338 bufa = *bufa_ptr; 5339 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5340 b_otha = b_oth->a; 5341 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5342 5343 /* a-array */ 5344 /*---------*/ 5345 /* post receives of a-array */ 5346 for (i=0; i<nrecvs; i++) { 5347 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5348 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5349 } 5350 5351 /* pack the outgoing message a-array */ 5352 k = 0; 5353 for (i=0; i<nsends; i++) { 5354 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5355 bufA = bufa+sstartsj[i]; 5356 for (j=0; j<nrows; j++) { 5357 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5358 for (ll=0; ll<sbs; ll++) { 5359 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5360 for (l=0; l<ncols; l++) { 5361 *bufA++ = vals[l]; 5362 } 5363 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5364 } 5365 } 5366 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5367 } 5368 /* recvs and sends of a-array are completed */ 5369 i = nrecvs; 5370 while (i--) { 5371 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5372 } 5373 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5374 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5375 5376 if (scall == MAT_INITIAL_MATRIX) { 5377 /* put together the new matrix */ 5378 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5379 5380 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5381 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5382 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5383 b_oth->free_a = PETSC_TRUE; 5384 b_oth->free_ij = PETSC_TRUE; 5385 b_oth->nonew = 0; 5386 5387 ierr = PetscFree(bufj);CHKERRQ(ierr); 5388 if (!startsj_s || !bufa_ptr) { 5389 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5390 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5391 } else { 5392 *startsj_s = sstartsj; 5393 *startsj_r = rstartsj; 5394 *bufa_ptr = bufa; 5395 } 5396 } 5397 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5398 PetscFunctionReturn(0); 5399 } 5400 5401 #undef __FUNCT__ 5402 #define __FUNCT__ "MatGetCommunicationStructs" 5403 /*@C 5404 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5405 5406 Not Collective 5407 5408 Input Parameters: 5409 . A - The matrix in mpiaij format 5410 5411 Output Parameter: 5412 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5413 . colmap - A map from global column index to local index into lvec 5414 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5415 5416 Level: developer 5417 5418 @*/ 5419 #if defined(PETSC_USE_CTABLE) 5420 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5421 #else 5422 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5423 #endif 5424 { 5425 Mat_MPIAIJ *a; 5426 5427 PetscFunctionBegin; 5428 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5429 PetscValidPointer(lvec, 2); 5430 PetscValidPointer(colmap, 3); 5431 PetscValidPointer(multScatter, 4); 5432 a = (Mat_MPIAIJ*) A->data; 5433 if (lvec) *lvec = a->lvec; 5434 if (colmap) *colmap = a->colmap; 5435 if (multScatter) *multScatter = a->Mvctx; 5436 PetscFunctionReturn(0); 5437 } 5438 5439 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5440 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5441 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5442 5443 #undef __FUNCT__ 5444 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5445 /* 5446 Computes (B'*A')' since computing B*A directly is untenable 5447 5448 n p p 5449 ( ) ( ) ( ) 5450 m ( A ) * n ( B ) = m ( C ) 5451 ( ) ( ) ( ) 5452 5453 */ 5454 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5455 { 5456 PetscErrorCode ierr; 5457 Mat At,Bt,Ct; 5458 5459 PetscFunctionBegin; 5460 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5461 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5462 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5463 ierr = MatDestroy(&At);CHKERRQ(ierr); 5464 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5465 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5466 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5467 PetscFunctionReturn(0); 5468 } 5469 5470 #undef __FUNCT__ 5471 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5472 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5473 { 5474 PetscErrorCode ierr; 5475 PetscInt m=A->rmap->n,n=B->cmap->n; 5476 Mat Cmat; 5477 5478 PetscFunctionBegin; 5479 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5480 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5481 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5482 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5483 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5484 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5485 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5486 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5487 5488 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5489 5490 *C = Cmat; 5491 PetscFunctionReturn(0); 5492 } 5493 5494 /* ----------------------------------------------------------------*/ 5495 #undef __FUNCT__ 5496 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5497 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5498 { 5499 PetscErrorCode ierr; 5500 5501 PetscFunctionBegin; 5502 if (scall == MAT_INITIAL_MATRIX) { 5503 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5504 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5505 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5506 } 5507 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5508 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5509 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5510 PetscFunctionReturn(0); 5511 } 5512 5513 #if defined(PETSC_HAVE_MUMPS) 5514 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5515 #endif 5516 #if defined(PETSC_HAVE_PASTIX) 5517 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5518 #endif 5519 #if defined(PETSC_HAVE_SUPERLU_DIST) 5520 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5521 #endif 5522 #if defined(PETSC_HAVE_CLIQUE) 5523 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5524 #endif 5525 5526 /*MC 5527 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5528 5529 Options Database Keys: 5530 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5531 5532 Level: beginner 5533 5534 .seealso: MatCreateAIJ() 5535 M*/ 5536 5537 #undef __FUNCT__ 5538 #define __FUNCT__ "MatCreate_MPIAIJ" 5539 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5540 { 5541 Mat_MPIAIJ *b; 5542 PetscErrorCode ierr; 5543 PetscMPIInt size; 5544 5545 PetscFunctionBegin; 5546 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5547 5548 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5549 B->data = (void*)b; 5550 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5551 B->assembled = PETSC_FALSE; 5552 B->insertmode = NOT_SET_VALUES; 5553 b->size = size; 5554 5555 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5556 5557 /* build cache for off array entries formed */ 5558 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5559 5560 b->donotstash = PETSC_FALSE; 5561 b->colmap = 0; 5562 b->garray = 0; 5563 b->roworiented = PETSC_TRUE; 5564 5565 /* stuff used for matrix vector multiply */ 5566 b->lvec = NULL; 5567 b->Mvctx = NULL; 5568 5569 /* stuff for MatGetRow() */ 5570 b->rowindices = 0; 5571 b->rowvalues = 0; 5572 b->getrowactive = PETSC_FALSE; 5573 5574 /* flexible pointer used in CUSP/CUSPARSE classes */ 5575 b->spptr = NULL; 5576 5577 #if defined(PETSC_HAVE_MUMPS) 5578 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5579 #endif 5580 #if defined(PETSC_HAVE_PASTIX) 5581 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5582 #endif 5583 #if defined(PETSC_HAVE_SUPERLU_DIST) 5584 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5585 #endif 5586 #if defined(PETSC_HAVE_CLIQUE) 5587 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5588 #endif 5589 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5590 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5591 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5603 PetscFunctionReturn(0); 5604 } 5605 5606 #undef __FUNCT__ 5607 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5608 /*@ 5609 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5610 and "off-diagonal" part of the matrix in CSR format. 5611 5612 Collective on MPI_Comm 5613 5614 Input Parameters: 5615 + comm - MPI communicator 5616 . m - number of local rows (Cannot be PETSC_DECIDE) 5617 . n - This value should be the same as the local size used in creating the 5618 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5619 calculated if N is given) For square matrices n is almost always m. 5620 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5621 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5622 . i - row indices for "diagonal" portion of matrix 5623 . j - column indices 5624 . a - matrix values 5625 . oi - row indices for "off-diagonal" portion of matrix 5626 . oj - column indices 5627 - oa - matrix values 5628 5629 Output Parameter: 5630 . mat - the matrix 5631 5632 Level: advanced 5633 5634 Notes: 5635 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5636 must free the arrays once the matrix has been destroyed and not before. 5637 5638 The i and j indices are 0 based 5639 5640 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5641 5642 This sets local rows and cannot be used to set off-processor values. 5643 5644 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5645 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5646 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5647 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5648 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5649 communication if it is known that only local entries will be set. 5650 5651 .keywords: matrix, aij, compressed row, sparse, parallel 5652 5653 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5654 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5655 @*/ 5656 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5657 { 5658 PetscErrorCode ierr; 5659 Mat_MPIAIJ *maij; 5660 5661 PetscFunctionBegin; 5662 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5663 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5664 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5665 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5666 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5667 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5668 maij = (Mat_MPIAIJ*) (*mat)->data; 5669 5670 (*mat)->preallocated = PETSC_TRUE; 5671 5672 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5673 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5674 5675 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5676 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5677 5678 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5679 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5680 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5681 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5682 5683 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5684 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5685 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5686 PetscFunctionReturn(0); 5687 } 5688 5689 /* 5690 Special version for direct calls from Fortran 5691 */ 5692 #include <petsc-private/fortranimpl.h> 5693 5694 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5695 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5696 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5697 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5698 #endif 5699 5700 /* Change these macros so can be used in void function */ 5701 #undef CHKERRQ 5702 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5703 #undef SETERRQ2 5704 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5705 #undef SETERRQ3 5706 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5707 #undef SETERRQ 5708 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5709 5710 #undef __FUNCT__ 5711 #define __FUNCT__ "matsetvaluesmpiaij_" 5712 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5713 { 5714 Mat mat = *mmat; 5715 PetscInt m = *mm, n = *mn; 5716 InsertMode addv = *maddv; 5717 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5718 PetscScalar value; 5719 PetscErrorCode ierr; 5720 5721 MatCheckPreallocated(mat,1); 5722 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5723 5724 #if defined(PETSC_USE_DEBUG) 5725 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5726 #endif 5727 { 5728 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5729 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5730 PetscBool roworiented = aij->roworiented; 5731 5732 /* Some Variables required in the macro */ 5733 Mat A = aij->A; 5734 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5735 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5736 MatScalar *aa = a->a; 5737 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5738 Mat B = aij->B; 5739 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5740 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5741 MatScalar *ba = b->a; 5742 5743 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5744 PetscInt nonew = a->nonew; 5745 MatScalar *ap1,*ap2; 5746 5747 PetscFunctionBegin; 5748 for (i=0; i<m; i++) { 5749 if (im[i] < 0) continue; 5750 #if defined(PETSC_USE_DEBUG) 5751 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5752 #endif 5753 if (im[i] >= rstart && im[i] < rend) { 5754 row = im[i] - rstart; 5755 lastcol1 = -1; 5756 rp1 = aj + ai[row]; 5757 ap1 = aa + ai[row]; 5758 rmax1 = aimax[row]; 5759 nrow1 = ailen[row]; 5760 low1 = 0; 5761 high1 = nrow1; 5762 lastcol2 = -1; 5763 rp2 = bj + bi[row]; 5764 ap2 = ba + bi[row]; 5765 rmax2 = bimax[row]; 5766 nrow2 = bilen[row]; 5767 low2 = 0; 5768 high2 = nrow2; 5769 5770 for (j=0; j<n; j++) { 5771 if (roworiented) value = v[i*n+j]; 5772 else value = v[i+j*m]; 5773 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5774 if (in[j] >= cstart && in[j] < cend) { 5775 col = in[j] - cstart; 5776 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5777 } else if (in[j] < 0) continue; 5778 #if defined(PETSC_USE_DEBUG) 5779 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5780 #endif 5781 else { 5782 if (mat->was_assembled) { 5783 if (!aij->colmap) { 5784 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5785 } 5786 #if defined(PETSC_USE_CTABLE) 5787 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5788 col--; 5789 #else 5790 col = aij->colmap[in[j]] - 1; 5791 #endif 5792 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5793 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5794 col = in[j]; 5795 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5796 B = aij->B; 5797 b = (Mat_SeqAIJ*)B->data; 5798 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5799 rp2 = bj + bi[row]; 5800 ap2 = ba + bi[row]; 5801 rmax2 = bimax[row]; 5802 nrow2 = bilen[row]; 5803 low2 = 0; 5804 high2 = nrow2; 5805 bm = aij->B->rmap->n; 5806 ba = b->a; 5807 } 5808 } else col = in[j]; 5809 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5810 } 5811 } 5812 } else if (!aij->donotstash) { 5813 if (roworiented) { 5814 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5815 } else { 5816 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5817 } 5818 } 5819 } 5820 } 5821 PetscFunctionReturnVoid(); 5822 } 5823 5824