1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) { 526 if (roworiented) value = v[i*n+j]; 527 else value = v[i+j*m]; 528 } else value = 0.0; 529 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 530 if (in[j] >= cstart && in[j] < cend) { 531 col = in[j] - cstart; 532 nonew = a->nonew; 533 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 534 } else if (in[j] < 0) continue; 535 #if defined(PETSC_USE_DEBUG) 536 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 537 #endif 538 else { 539 if (mat->was_assembled) { 540 if (!aij->colmap) { 541 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 542 } 543 #if defined(PETSC_USE_CTABLE) 544 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 545 col--; 546 #else 547 col = aij->colmap[in[j]] - 1; 548 #endif 549 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 550 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 551 col = in[j]; 552 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 553 B = aij->B; 554 b = (Mat_SeqAIJ*)B->data; 555 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 556 rp2 = bj + bi[row]; 557 ap2 = ba + bi[row]; 558 rmax2 = bimax[row]; 559 nrow2 = bilen[row]; 560 low2 = 0; 561 high2 = nrow2; 562 bm = aij->B->rmap->n; 563 ba = b->a; 564 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 565 } else col = in[j]; 566 nonew = b->nonew; 567 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 568 } 569 } 570 } else { 571 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 572 if (!aij->donotstash) { 573 mat->assembled = PETSC_FALSE; 574 if (roworiented) { 575 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 576 } else { 577 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 578 } 579 } 580 } 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatGetValues_MPIAIJ" 587 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 588 { 589 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 590 PetscErrorCode ierr; 591 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 592 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 593 594 PetscFunctionBegin; 595 for (i=0; i<m; i++) { 596 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 597 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 598 if (idxm[i] >= rstart && idxm[i] < rend) { 599 row = idxm[i] - rstart; 600 for (j=0; j<n; j++) { 601 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 602 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 603 if (idxn[j] >= cstart && idxn[j] < cend) { 604 col = idxn[j] - cstart; 605 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 606 } else { 607 if (!aij->colmap) { 608 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 609 } 610 #if defined(PETSC_USE_CTABLE) 611 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 612 col--; 613 #else 614 col = aij->colmap[idxn[j]] - 1; 615 #endif 616 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 617 else { 618 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 619 } 620 } 621 } 622 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 623 } 624 PetscFunctionReturn(0); 625 } 626 627 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 628 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 631 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 632 { 633 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 634 PetscErrorCode ierr; 635 PetscInt nstash,reallocs; 636 InsertMode addv; 637 638 PetscFunctionBegin; 639 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 640 641 /* make sure all processors are either in INSERTMODE or ADDMODE */ 642 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 643 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 644 mat->insertmode = addv; /* in case this processor had no cache */ 645 646 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 647 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 648 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 654 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 655 { 656 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 658 PetscErrorCode ierr; 659 PetscMPIInt n; 660 PetscInt i,j,rstart,ncols,flg; 661 PetscInt *row,*col; 662 PetscBool other_disassembled; 663 PetscScalar *val; 664 InsertMode addv = mat->insertmode; 665 666 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 667 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n; ) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { 677 if (row[j] != rstart) break; 678 } 679 if (j < n) ncols = j-i; 680 else ncols = n-i; 681 /* Now assemble all these values with a single function call */ 682 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 683 684 i = j; 685 } 686 } 687 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 691 692 /* determine if any processor has disassembled, if so we must 693 also disassemble ourselfs, in order that we may reassemble. */ 694 /* 695 if nonzero structure of submatrix B cannot change then we know that 696 no processor disassembled thus we can skip this stuff 697 */ 698 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 699 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 700 if (mat->was_assembled && !other_disassembled) { 701 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 702 } 703 } 704 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 705 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 706 } 707 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 708 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 709 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 710 711 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 712 713 aij->rowvalues = 0; 714 715 /* used by MatAXPY() */ 716 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 717 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 718 719 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 720 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 721 722 { 723 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 724 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 725 } 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 738 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_MPIAIJ" 744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 745 { 746 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 747 PetscInt *owners = A->rmap->range; 748 PetscInt n = A->rmap->n; 749 PetscMPIInt size = mat->size; 750 PetscSF sf; 751 PetscInt *lrows; 752 PetscSFNode *rrows; 753 PetscInt lastidx = -1, r, p = 0, len = 0; 754 PetscErrorCode ierr; 755 756 PetscFunctionBegin; 757 /* Create SF where leaves are input rows and roots are owned rows */ 758 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 759 for (r = 0; r < n; ++r) lrows[r] = -1; 760 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 761 for (r = 0; r < N; ++r) { 762 const PetscInt idx = rows[r]; 763 PetscBool found = PETSC_FALSE; 764 /* Trick for efficient searching for sorted rows */ 765 if (lastidx > idx) p = 0; 766 lastidx = idx; 767 for (; p < size; ++p) { 768 if (idx >= owners[p] && idx < owners[p+1]) { 769 rrows[r].rank = p; 770 rrows[r].index = rows[r] - owners[p]; 771 found = PETSC_TRUE; 772 break; 773 } 774 } 775 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 776 } 777 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 778 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 779 /* Collect flags for rows to be zeroed */ 780 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 781 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 782 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 783 /* Compress and put in row numbers */ 784 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 792 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 793 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 794 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 795 } 796 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 797 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 798 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 799 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 800 } else if (diag != 0.0) { 801 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 802 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 803 for (r = 0; r < len; ++r) { 804 const PetscInt row = lrows[r] + A->rmap->rstart; 805 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 806 } 807 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 808 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 809 } else { 810 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 811 } 812 ierr = PetscFree(lrows);CHKERRQ(ierr); 813 { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 823 { 824 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 825 PetscErrorCode ierr; 826 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 827 PetscInt i,j,r,m,p = 0,len = 0; 828 PetscInt *lrows,*owners = A->rmap->range; 829 PetscSFNode *rrows; 830 PetscSF sf; 831 const PetscScalar *xx; 832 PetscScalar *bb,*mask; 833 Vec xmask,lmask; 834 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 835 const PetscInt *aj, *ii,*ridx; 836 PetscScalar *aa; 837 #if defined(PETSC_DEBUG) 838 PetscBool found = PETSC_FALSE; 839 #endif 840 841 PetscFunctionBegin; 842 /* Create SF where leaves are input rows and roots are owned rows */ 843 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 844 for (r = 0; r < n; ++r) lrows[r] = -1; 845 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 846 for (r = 0; r < N; ++r) { 847 const PetscInt idx = rows[r]; 848 PetscBool found = PETSC_FALSE; 849 /* Trick for efficient searching for sorted rows */ 850 if (lastidx > idx) p = 0; 851 lastidx = idx; 852 for (; p < size; ++p) { 853 if (idx >= owners[p] && idx < owners[p+1]) { 854 rrows[r].rank = p; 855 rrows[r].index = rows[r] - owners[p]; 856 found = PETSC_TRUE; 857 break; 858 } 859 } 860 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 861 } 862 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 863 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 864 /* Collect flags for rows to be zeroed */ 865 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 866 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 867 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 868 /* Compress and put in row numbers */ 869 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870 /* zero diagonal part of matrix */ 871 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 872 /* handle off diagonal part of matrix */ 873 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 874 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 875 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 876 for (i=0; i<len; i++) bb[lrows[i]] = 1; 877 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 878 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 881 if (x) { 882 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 884 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 885 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 886 } 887 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 888 /* remove zeroed rows of off diagonal matrix */ 889 ii = aij->i; 890 for (i=0; i<len; i++) { 891 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 /* loop over all elements of off process part of matrix zeroing removed columns*/ 894 if (aij->compressedrow.use) { 895 m = aij->compressedrow.nrows; 896 ii = aij->compressedrow.i; 897 ridx = aij->compressedrow.rindex; 898 for (i=0; i<m; i++) { 899 n = ii[i+1] - ii[i]; 900 aj = aij->j + ii[i]; 901 aa = aij->a + ii[i]; 902 903 for (j=0; j<n; j++) { 904 if (PetscAbsScalar(mask[*aj])) { 905 if (b) bb[*ridx] -= *aa*xx[*aj]; 906 *aa = 0.0; 907 } 908 aa++; 909 aj++; 910 } 911 ridx++; 912 } 913 } else { /* do not use compressed row format */ 914 m = l->B->rmap->n; 915 for (i=0; i<m; i++) { 916 n = ii[i+1] - ii[i]; 917 aj = aij->j + ii[i]; 918 aa = aij->a + ii[i]; 919 for (j=0; j<n; j++) { 920 if (PetscAbsScalar(mask[*aj])) { 921 if (b) bb[i] -= *aa*xx[*aj]; 922 *aa = 0.0; 923 } 924 aa++; 925 aj++; 926 } 927 } 928 } 929 if (x) { 930 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 931 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 932 } 933 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 934 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 935 ierr = PetscFree(lrows);CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatMult_MPIAIJ" 941 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 942 { 943 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 944 PetscErrorCode ierr; 945 PetscInt nt; 946 947 PetscFunctionBegin; 948 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 949 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 950 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 951 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 952 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 953 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 #undef __FUNCT__ 958 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 959 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 960 { 961 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultAdd_MPIAIJ" 971 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 979 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 986 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 987 { 988 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 989 PetscErrorCode ierr; 990 PetscBool merged; 991 992 PetscFunctionBegin; 993 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 994 /* do nondiagonal part */ 995 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 996 if (!merged) { 997 /* send it on its way */ 998 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 /* do local part */ 1000 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1001 /* receive remote parts: note this assumes the values are not actually */ 1002 /* added in yy until the next line, */ 1003 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1004 } else { 1005 /* do local part */ 1006 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* send it on its way */ 1008 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1009 /* values actually were received in the Begin() but we need to call this nop */ 1010 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 } 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1017 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1018 { 1019 MPI_Comm comm; 1020 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1021 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1022 IS Me,Notme; 1023 PetscErrorCode ierr; 1024 PetscInt M,N,first,last,*notme,i; 1025 PetscMPIInt size; 1026 1027 PetscFunctionBegin; 1028 /* Easy test: symmetric diagonal block */ 1029 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1030 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1031 if (!*f) PetscFunctionReturn(0); 1032 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1033 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1034 if (size == 1) PetscFunctionReturn(0); 1035 1036 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1037 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1038 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1039 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1040 for (i=0; i<first; i++) notme[i] = i; 1041 for (i=last; i<M; i++) notme[i-last+first] = i; 1042 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1043 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1044 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1045 Aoff = Aoffs[0]; 1046 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1047 Boff = Boffs[0]; 1048 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1049 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1050 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1051 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1052 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1053 ierr = PetscFree(notme);CHKERRQ(ierr); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1060 { 1061 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1062 PetscErrorCode ierr; 1063 1064 PetscFunctionBegin; 1065 /* do nondiagonal part */ 1066 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1067 /* send it on its way */ 1068 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1069 /* do local part */ 1070 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1071 /* receive remote parts */ 1072 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 /* 1077 This only works correctly for square matrices where the subblock A->A is the 1078 diagonal block 1079 */ 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1082 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1083 { 1084 PetscErrorCode ierr; 1085 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1086 1087 PetscFunctionBegin; 1088 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1089 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1090 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatScale_MPIAIJ" 1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1097 { 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1103 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatDestroy_MPIAIJ" 1109 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1110 { 1111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 #if defined(PETSC_USE_LOG) 1116 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1117 #endif 1118 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1119 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1120 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1121 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1122 #if defined(PETSC_USE_CTABLE) 1123 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1124 #else 1125 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1126 #endif 1127 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1128 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1129 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1130 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1131 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1132 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1133 1134 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1148 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1149 { 1150 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1151 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1152 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1153 PetscErrorCode ierr; 1154 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1155 int fd; 1156 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1157 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1158 PetscScalar *column_values; 1159 PetscInt message_count,flowcontrolcount; 1160 FILE *file; 1161 1162 PetscFunctionBegin; 1163 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1164 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1165 nz = A->nz + B->nz; 1166 if (!rank) { 1167 header[0] = MAT_FILE_CLASSID; 1168 header[1] = mat->rmap->N; 1169 header[2] = mat->cmap->N; 1170 1171 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1172 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1173 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1174 /* get largest number of rows any processor has */ 1175 rlen = mat->rmap->n; 1176 range = mat->rmap->range; 1177 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1178 } else { 1179 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1180 rlen = mat->rmap->n; 1181 } 1182 1183 /* load up the local row counts */ 1184 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1185 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1186 1187 /* store the row lengths to the file */ 1188 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1189 if (!rank) { 1190 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1191 for (i=1; i<size; i++) { 1192 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1193 rlen = range[i+1] - range[i]; 1194 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1195 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1196 } 1197 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1198 } else { 1199 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1200 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1201 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1202 } 1203 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1204 1205 /* load up the local column indices */ 1206 nzmax = nz; /* th processor needs space a largest processor needs */ 1207 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1208 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1209 cnt = 0; 1210 for (i=0; i<mat->rmap->n; i++) { 1211 for (j=B->i[i]; j<B->i[i+1]; j++) { 1212 if ((col = garray[B->j[j]]) > cstart) break; 1213 column_indices[cnt++] = col; 1214 } 1215 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1216 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1217 } 1218 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1219 1220 /* store the column indices to the file */ 1221 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1222 if (!rank) { 1223 MPI_Status status; 1224 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1225 for (i=1; i<size; i++) { 1226 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1227 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1228 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1229 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1230 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1231 } 1232 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1233 } else { 1234 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1235 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1236 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1237 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1238 } 1239 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1240 1241 /* load up the local column values */ 1242 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1243 cnt = 0; 1244 for (i=0; i<mat->rmap->n; i++) { 1245 for (j=B->i[i]; j<B->i[i+1]; j++) { 1246 if (garray[B->j[j]] > cstart) break; 1247 column_values[cnt++] = B->a[j]; 1248 } 1249 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1250 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1251 } 1252 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1253 1254 /* store the column values to the file */ 1255 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1256 if (!rank) { 1257 MPI_Status status; 1258 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1259 for (i=1; i<size; i++) { 1260 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1261 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1262 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1263 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1264 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1265 } 1266 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1267 } else { 1268 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1269 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1271 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1272 } 1273 ierr = PetscFree(column_values);CHKERRQ(ierr); 1274 1275 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1276 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #include <petscdraw.h> 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1286 PetscErrorCode ierr; 1287 PetscMPIInt rank = aij->rank,size = aij->size; 1288 PetscBool isdraw,iascii,isbinary; 1289 PetscViewer sviewer; 1290 PetscViewerFormat format; 1291 1292 PetscFunctionBegin; 1293 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1294 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1295 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1296 if (iascii) { 1297 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1298 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1299 MatInfo info; 1300 PetscBool inodes; 1301 1302 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1303 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1304 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1306 if (!inodes) { 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1308 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1309 } else { 1310 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1311 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1312 } 1313 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1315 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1317 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1318 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1320 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1323 PetscInt inodecount,inodelimit,*inodes; 1324 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1325 if (inodes) { 1326 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1327 } else { 1328 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1329 } 1330 PetscFunctionReturn(0); 1331 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1332 PetscFunctionReturn(0); 1333 } 1334 } else if (isbinary) { 1335 if (size == 1) { 1336 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1337 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1338 } else { 1339 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } else if (isdraw) { 1343 PetscDraw draw; 1344 PetscBool isnull; 1345 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1346 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1347 } 1348 1349 if (size == 1) { 1350 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1351 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1352 } else { 1353 /* assemble the entire matrix onto first processor. */ 1354 Mat A; 1355 Mat_SeqAIJ *Aloc; 1356 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1357 MatScalar *a; 1358 1359 if (mat->rmap->N > 1024) { 1360 PetscBool flg = PETSC_FALSE; 1361 1362 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1363 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1364 } 1365 1366 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1367 if (!rank) { 1368 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1369 } else { 1370 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1371 } 1372 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1373 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1374 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1375 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1376 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1377 1378 /* copy over the A part */ 1379 Aloc = (Mat_SeqAIJ*)aij->A->data; 1380 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1381 row = mat->rmap->rstart; 1382 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1383 for (i=0; i<m; i++) { 1384 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1385 row++; 1386 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1387 } 1388 aj = Aloc->j; 1389 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1390 1391 /* copy over the B part */ 1392 Aloc = (Mat_SeqAIJ*)aij->B->data; 1393 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1394 row = mat->rmap->rstart; 1395 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1396 ct = cols; 1397 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1398 for (i=0; i<m; i++) { 1399 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1400 row++; 1401 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1402 } 1403 ierr = PetscFree(ct);CHKERRQ(ierr); 1404 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1405 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1406 /* 1407 Everyone has to call to draw the matrix since the graphics waits are 1408 synchronized across all processors that share the PetscDraw object 1409 */ 1410 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1411 if (!rank) { 1412 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1413 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1414 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1415 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1416 } 1417 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1418 ierr = MatDestroy(&A);CHKERRQ(ierr); 1419 } 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatView_MPIAIJ" 1425 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1426 { 1427 PetscErrorCode ierr; 1428 PetscBool iascii,isdraw,issocket,isbinary; 1429 1430 PetscFunctionBegin; 1431 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1433 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1434 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1435 if (iascii || isdraw || isbinary || issocket) { 1436 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1437 } 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatSOR_MPIAIJ" 1443 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1444 { 1445 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1446 PetscErrorCode ierr; 1447 Vec bb1 = 0; 1448 PetscBool hasop; 1449 1450 PetscFunctionBegin; 1451 if (flag == SOR_APPLY_UPPER) { 1452 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1453 PetscFunctionReturn(0); 1454 } 1455 1456 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1457 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1458 } 1459 1460 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1461 if (flag & SOR_ZERO_INITIAL_GUESS) { 1462 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1463 its--; 1464 } 1465 1466 while (its--) { 1467 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1468 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1469 1470 /* update rhs: bb1 = bb - B*x */ 1471 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1472 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1473 1474 /* local sweep */ 1475 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1476 } 1477 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1478 if (flag & SOR_ZERO_INITIAL_GUESS) { 1479 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1480 its--; 1481 } 1482 while (its--) { 1483 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1484 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1485 1486 /* update rhs: bb1 = bb - B*x */ 1487 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1488 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1489 1490 /* local sweep */ 1491 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1492 } 1493 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1494 if (flag & SOR_ZERO_INITIAL_GUESS) { 1495 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1496 its--; 1497 } 1498 while (its--) { 1499 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1500 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1501 1502 /* update rhs: bb1 = bb - B*x */ 1503 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1504 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1505 1506 /* local sweep */ 1507 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1508 } 1509 } else if (flag & SOR_EISENSTAT) { 1510 Vec xx1; 1511 1512 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1513 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1514 1515 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1516 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1517 if (!mat->diag) { 1518 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1519 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1520 } 1521 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1522 if (hasop) { 1523 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1524 } else { 1525 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1526 } 1527 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1528 1529 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1530 1531 /* local sweep */ 1532 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1533 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1534 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1535 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1536 1537 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatPermute_MPIAIJ" 1543 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1544 { 1545 Mat aA,aB,Aperm; 1546 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1547 PetscScalar *aa,*ba; 1548 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1549 PetscSF rowsf,sf; 1550 IS parcolp = NULL; 1551 PetscBool done; 1552 PetscErrorCode ierr; 1553 1554 PetscFunctionBegin; 1555 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1556 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1557 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1558 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1559 1560 /* Invert row permutation to find out where my rows should go */ 1561 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1562 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1563 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1564 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1565 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1566 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1567 1568 /* Invert column permutation to find out where my columns should go */ 1569 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1570 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1571 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1572 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1573 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1574 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1575 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1576 1577 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1578 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1579 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1580 1581 /* Find out where my gcols should go */ 1582 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1583 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1584 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1585 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1586 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1587 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1588 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1589 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1590 1591 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1592 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1593 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1594 for (i=0; i<m; i++) { 1595 PetscInt row = rdest[i],rowner; 1596 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1597 for (j=ai[i]; j<ai[i+1]; j++) { 1598 PetscInt cowner,col = cdest[aj[j]]; 1599 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1600 if (rowner == cowner) dnnz[i]++; 1601 else onnz[i]++; 1602 } 1603 for (j=bi[i]; j<bi[i+1]; j++) { 1604 PetscInt cowner,col = gcdest[bj[j]]; 1605 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1606 if (rowner == cowner) dnnz[i]++; 1607 else onnz[i]++; 1608 } 1609 } 1610 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1611 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1612 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1613 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1614 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1615 1616 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1617 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1618 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1619 for (i=0; i<m; i++) { 1620 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1621 PetscInt j0,rowlen; 1622 rowlen = ai[i+1] - ai[i]; 1623 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1624 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1625 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1626 } 1627 rowlen = bi[i+1] - bi[i]; 1628 for (j0=j=0; j<rowlen; j0=j) { 1629 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1630 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1631 } 1632 } 1633 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1634 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1635 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1636 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1637 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1638 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1639 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1640 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1641 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1642 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1643 *B = Aperm; 1644 PetscFunctionReturn(0); 1645 } 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1649 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1650 { 1651 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1652 Mat A = mat->A,B = mat->B; 1653 PetscErrorCode ierr; 1654 PetscReal isend[5],irecv[5]; 1655 1656 PetscFunctionBegin; 1657 info->block_size = 1.0; 1658 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1659 1660 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1661 isend[3] = info->memory; isend[4] = info->mallocs; 1662 1663 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1664 1665 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1666 isend[3] += info->memory; isend[4] += info->mallocs; 1667 if (flag == MAT_LOCAL) { 1668 info->nz_used = isend[0]; 1669 info->nz_allocated = isend[1]; 1670 info->nz_unneeded = isend[2]; 1671 info->memory = isend[3]; 1672 info->mallocs = isend[4]; 1673 } else if (flag == MAT_GLOBAL_MAX) { 1674 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1675 1676 info->nz_used = irecv[0]; 1677 info->nz_allocated = irecv[1]; 1678 info->nz_unneeded = irecv[2]; 1679 info->memory = irecv[3]; 1680 info->mallocs = irecv[4]; 1681 } else if (flag == MAT_GLOBAL_SUM) { 1682 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1683 1684 info->nz_used = irecv[0]; 1685 info->nz_allocated = irecv[1]; 1686 info->nz_unneeded = irecv[2]; 1687 info->memory = irecv[3]; 1688 info->mallocs = irecv[4]; 1689 } 1690 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1691 info->fill_ratio_needed = 0; 1692 info->factor_mallocs = 0; 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatSetOption_MPIAIJ" 1698 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1699 { 1700 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1701 PetscErrorCode ierr; 1702 1703 PetscFunctionBegin; 1704 switch (op) { 1705 case MAT_NEW_NONZERO_LOCATIONS: 1706 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1707 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1708 case MAT_KEEP_NONZERO_PATTERN: 1709 case MAT_NEW_NONZERO_LOCATION_ERR: 1710 case MAT_USE_INODES: 1711 case MAT_IGNORE_ZERO_ENTRIES: 1712 MatCheckPreallocated(A,1); 1713 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1714 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1715 break; 1716 case MAT_ROW_ORIENTED: 1717 a->roworiented = flg; 1718 1719 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1720 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1721 break; 1722 case MAT_NEW_DIAGONALS: 1723 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1724 break; 1725 case MAT_IGNORE_OFF_PROC_ENTRIES: 1726 a->donotstash = flg; 1727 break; 1728 case MAT_SPD: 1729 A->spd_set = PETSC_TRUE; 1730 A->spd = flg; 1731 if (flg) { 1732 A->symmetric = PETSC_TRUE; 1733 A->structurally_symmetric = PETSC_TRUE; 1734 A->symmetric_set = PETSC_TRUE; 1735 A->structurally_symmetric_set = PETSC_TRUE; 1736 } 1737 break; 1738 case MAT_SYMMETRIC: 1739 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1740 break; 1741 case MAT_STRUCTURALLY_SYMMETRIC: 1742 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743 break; 1744 case MAT_HERMITIAN: 1745 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1746 break; 1747 case MAT_SYMMETRY_ETERNAL: 1748 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1749 break; 1750 default: 1751 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1752 } 1753 PetscFunctionReturn(0); 1754 } 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatGetRow_MPIAIJ" 1758 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1759 { 1760 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1761 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1762 PetscErrorCode ierr; 1763 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1764 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1765 PetscInt *cmap,*idx_p; 1766 1767 PetscFunctionBegin; 1768 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1769 mat->getrowactive = PETSC_TRUE; 1770 1771 if (!mat->rowvalues && (idx || v)) { 1772 /* 1773 allocate enough space to hold information from the longest row. 1774 */ 1775 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1776 PetscInt max = 1,tmp; 1777 for (i=0; i<matin->rmap->n; i++) { 1778 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1779 if (max < tmp) max = tmp; 1780 } 1781 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1782 } 1783 1784 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1785 lrow = row - rstart; 1786 1787 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1788 if (!v) {pvA = 0; pvB = 0;} 1789 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1790 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1791 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1792 nztot = nzA + nzB; 1793 1794 cmap = mat->garray; 1795 if (v || idx) { 1796 if (nztot) { 1797 /* Sort by increasing column numbers, assuming A and B already sorted */ 1798 PetscInt imark = -1; 1799 if (v) { 1800 *v = v_p = mat->rowvalues; 1801 for (i=0; i<nzB; i++) { 1802 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1803 else break; 1804 } 1805 imark = i; 1806 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1807 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1808 } 1809 if (idx) { 1810 *idx = idx_p = mat->rowindices; 1811 if (imark > -1) { 1812 for (i=0; i<imark; i++) { 1813 idx_p[i] = cmap[cworkB[i]]; 1814 } 1815 } else { 1816 for (i=0; i<nzB; i++) { 1817 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1818 else break; 1819 } 1820 imark = i; 1821 } 1822 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1823 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1824 } 1825 } else { 1826 if (idx) *idx = 0; 1827 if (v) *v = 0; 1828 } 1829 } 1830 *nz = nztot; 1831 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1832 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1838 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1839 { 1840 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1841 1842 PetscFunctionBegin; 1843 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1844 aij->getrowactive = PETSC_FALSE; 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatNorm_MPIAIJ" 1850 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1851 { 1852 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1853 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1854 PetscErrorCode ierr; 1855 PetscInt i,j,cstart = mat->cmap->rstart; 1856 PetscReal sum = 0.0; 1857 MatScalar *v; 1858 1859 PetscFunctionBegin; 1860 if (aij->size == 1) { 1861 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1862 } else { 1863 if (type == NORM_FROBENIUS) { 1864 v = amat->a; 1865 for (i=0; i<amat->nz; i++) { 1866 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1867 } 1868 v = bmat->a; 1869 for (i=0; i<bmat->nz; i++) { 1870 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1871 } 1872 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1873 *norm = PetscSqrtReal(*norm); 1874 } else if (type == NORM_1) { /* max column norm */ 1875 PetscReal *tmp,*tmp2; 1876 PetscInt *jj,*garray = aij->garray; 1877 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1878 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1879 *norm = 0.0; 1880 v = amat->a; jj = amat->j; 1881 for (j=0; j<amat->nz; j++) { 1882 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1883 } 1884 v = bmat->a; jj = bmat->j; 1885 for (j=0; j<bmat->nz; j++) { 1886 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1887 } 1888 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1889 for (j=0; j<mat->cmap->N; j++) { 1890 if (tmp2[j] > *norm) *norm = tmp2[j]; 1891 } 1892 ierr = PetscFree(tmp);CHKERRQ(ierr); 1893 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1894 } else if (type == NORM_INFINITY) { /* max row norm */ 1895 PetscReal ntemp = 0.0; 1896 for (j=0; j<aij->A->rmap->n; j++) { 1897 v = amat->a + amat->i[j]; 1898 sum = 0.0; 1899 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1900 sum += PetscAbsScalar(*v); v++; 1901 } 1902 v = bmat->a + bmat->i[j]; 1903 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1904 sum += PetscAbsScalar(*v); v++; 1905 } 1906 if (sum > ntemp) ntemp = sum; 1907 } 1908 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1909 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatTranspose_MPIAIJ" 1916 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1917 { 1918 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1919 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1920 PetscErrorCode ierr; 1921 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1922 PetscInt cstart = A->cmap->rstart,ncol; 1923 Mat B; 1924 MatScalar *array; 1925 1926 PetscFunctionBegin; 1927 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1928 1929 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1930 ai = Aloc->i; aj = Aloc->j; 1931 bi = Bloc->i; bj = Bloc->j; 1932 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1933 PetscInt *d_nnz,*g_nnz,*o_nnz; 1934 PetscSFNode *oloc; 1935 PETSC_UNUSED PetscSF sf; 1936 1937 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1938 /* compute d_nnz for preallocation */ 1939 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1940 for (i=0; i<ai[ma]; i++) { 1941 d_nnz[aj[i]]++; 1942 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1943 } 1944 /* compute local off-diagonal contributions */ 1945 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1946 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1947 /* map those to global */ 1948 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1949 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1950 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1951 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1952 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1953 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1954 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1955 1956 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1957 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1958 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1959 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1960 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1961 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1962 } else { 1963 B = *matout; 1964 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1965 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1966 } 1967 1968 /* copy over the A part */ 1969 array = Aloc->a; 1970 row = A->rmap->rstart; 1971 for (i=0; i<ma; i++) { 1972 ncol = ai[i+1]-ai[i]; 1973 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1974 row++; 1975 array += ncol; aj += ncol; 1976 } 1977 aj = Aloc->j; 1978 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1979 1980 /* copy over the B part */ 1981 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1982 array = Bloc->a; 1983 row = A->rmap->rstart; 1984 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1985 cols_tmp = cols; 1986 for (i=0; i<mb; i++) { 1987 ncol = bi[i+1]-bi[i]; 1988 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1989 row++; 1990 array += ncol; cols_tmp += ncol; 1991 } 1992 ierr = PetscFree(cols);CHKERRQ(ierr); 1993 1994 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1995 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1996 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1997 *matout = B; 1998 } else { 1999 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2000 } 2001 PetscFunctionReturn(0); 2002 } 2003 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2006 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2007 { 2008 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2009 Mat a = aij->A,b = aij->B; 2010 PetscErrorCode ierr; 2011 PetscInt s1,s2,s3; 2012 2013 PetscFunctionBegin; 2014 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2015 if (rr) { 2016 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2017 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2018 /* Overlap communication with computation. */ 2019 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2020 } 2021 if (ll) { 2022 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2023 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2024 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2025 } 2026 /* scale the diagonal block */ 2027 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2028 2029 if (rr) { 2030 /* Do a scatter end and then right scale the off-diagonal block */ 2031 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2032 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2033 } 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2039 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2040 { 2041 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2042 PetscErrorCode ierr; 2043 2044 PetscFunctionBegin; 2045 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2046 PetscFunctionReturn(0); 2047 } 2048 2049 #undef __FUNCT__ 2050 #define __FUNCT__ "MatEqual_MPIAIJ" 2051 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2052 { 2053 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2054 Mat a,b,c,d; 2055 PetscBool flg; 2056 PetscErrorCode ierr; 2057 2058 PetscFunctionBegin; 2059 a = matA->A; b = matA->B; 2060 c = matB->A; d = matB->B; 2061 2062 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2063 if (flg) { 2064 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2065 } 2066 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatCopy_MPIAIJ" 2072 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2073 { 2074 PetscErrorCode ierr; 2075 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2076 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2077 2078 PetscFunctionBegin; 2079 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2080 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2081 /* because of the column compression in the off-processor part of the matrix a->B, 2082 the number of columns in a->B and b->B may be different, hence we cannot call 2083 the MatCopy() directly on the two parts. If need be, we can provide a more 2084 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2085 then copying the submatrices */ 2086 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2087 } else { 2088 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2089 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2090 } 2091 PetscFunctionReturn(0); 2092 } 2093 2094 #undef __FUNCT__ 2095 #define __FUNCT__ "MatSetUp_MPIAIJ" 2096 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2097 { 2098 PetscErrorCode ierr; 2099 2100 PetscFunctionBegin; 2101 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2107 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2108 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2109 { 2110 PetscInt i,m=Y->rmap->N; 2111 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2112 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2113 const PetscInt *xi = x->i,*yi = y->i; 2114 2115 PetscFunctionBegin; 2116 /* Set the number of nonzeros in the new matrix */ 2117 for (i=0; i<m; i++) { 2118 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2119 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2120 nnz[i] = 0; 2121 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2122 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2123 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2124 nnz[i]++; 2125 } 2126 for (; k<nzy; k++) nnz[i]++; 2127 } 2128 PetscFunctionReturn(0); 2129 } 2130 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "MatAXPY_MPIAIJ" 2133 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2134 { 2135 PetscErrorCode ierr; 2136 PetscInt i; 2137 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2138 PetscBLASInt bnz,one=1; 2139 Mat_SeqAIJ *x,*y; 2140 2141 PetscFunctionBegin; 2142 if (str == SAME_NONZERO_PATTERN) { 2143 PetscScalar alpha = a; 2144 x = (Mat_SeqAIJ*)xx->A->data; 2145 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2146 y = (Mat_SeqAIJ*)yy->A->data; 2147 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2148 x = (Mat_SeqAIJ*)xx->B->data; 2149 y = (Mat_SeqAIJ*)yy->B->data; 2150 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2151 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2152 } else if (str == SUBSET_NONZERO_PATTERN) { 2153 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2154 2155 x = (Mat_SeqAIJ*)xx->B->data; 2156 y = (Mat_SeqAIJ*)yy->B->data; 2157 if (y->xtoy && y->XtoY != xx->B) { 2158 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2159 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2160 } 2161 if (!y->xtoy) { /* get xtoy */ 2162 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2163 y->XtoY = xx->B; 2164 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2165 } 2166 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2167 } else { 2168 Mat B; 2169 PetscInt *nnz_d,*nnz_o; 2170 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2171 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2172 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2173 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2174 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2175 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2176 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2177 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2178 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2179 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2180 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2181 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2182 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2183 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2184 } 2185 PetscFunctionReturn(0); 2186 } 2187 2188 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2189 2190 #undef __FUNCT__ 2191 #define __FUNCT__ "MatConjugate_MPIAIJ" 2192 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2193 { 2194 #if defined(PETSC_USE_COMPLEX) 2195 PetscErrorCode ierr; 2196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2197 2198 PetscFunctionBegin; 2199 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2200 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2201 #else 2202 PetscFunctionBegin; 2203 #endif 2204 PetscFunctionReturn(0); 2205 } 2206 2207 #undef __FUNCT__ 2208 #define __FUNCT__ "MatRealPart_MPIAIJ" 2209 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2210 { 2211 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2212 PetscErrorCode ierr; 2213 2214 PetscFunctionBegin; 2215 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2216 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2217 PetscFunctionReturn(0); 2218 } 2219 2220 #undef __FUNCT__ 2221 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2222 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2223 { 2224 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2225 PetscErrorCode ierr; 2226 2227 PetscFunctionBegin; 2228 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2229 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #if defined(PETSC_HAVE_PBGL) 2234 2235 #include <boost/parallel/mpi/bsp_process_group.hpp> 2236 #include <boost/graph/distributed/ilu_default_graph.hpp> 2237 #include <boost/graph/distributed/ilu_0_block.hpp> 2238 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2239 #include <boost/graph/distributed/petsc/interface.hpp> 2240 #include <boost/multi_array.hpp> 2241 #include <boost/parallel/distributed_property_map->hpp> 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2245 /* 2246 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2247 */ 2248 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2249 { 2250 namespace petsc = boost::distributed::petsc; 2251 2252 namespace graph_dist = boost::graph::distributed; 2253 using boost::graph::distributed::ilu_default::process_group_type; 2254 using boost::graph::ilu_permuted; 2255 2256 PetscBool row_identity, col_identity; 2257 PetscContainer c; 2258 PetscInt m, n, M, N; 2259 PetscErrorCode ierr; 2260 2261 PetscFunctionBegin; 2262 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2263 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2264 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2265 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2266 2267 process_group_type pg; 2268 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2269 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2270 lgraph_type& level_graph = *lgraph_p; 2271 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2272 2273 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2274 ilu_permuted(level_graph); 2275 2276 /* put together the new matrix */ 2277 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2278 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2279 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2280 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2281 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2282 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2283 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2284 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2285 2286 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2287 ierr = PetscContainerSetPointer(c, lgraph_p); 2288 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2289 ierr = PetscContainerDestroy(&c); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2295 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2296 { 2297 PetscFunctionBegin; 2298 PetscFunctionReturn(0); 2299 } 2300 2301 #undef __FUNCT__ 2302 #define __FUNCT__ "MatSolve_MPIAIJ" 2303 /* 2304 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2305 */ 2306 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2307 { 2308 namespace graph_dist = boost::graph::distributed; 2309 2310 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2311 lgraph_type *lgraph_p; 2312 PetscContainer c; 2313 PetscErrorCode ierr; 2314 2315 PetscFunctionBegin; 2316 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2317 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2318 ierr = VecCopy(b, x);CHKERRQ(ierr); 2319 2320 PetscScalar *array_x; 2321 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2322 PetscInt sx; 2323 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2324 2325 PetscScalar *array_b; 2326 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2327 PetscInt sb; 2328 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2329 2330 lgraph_type& level_graph = *lgraph_p; 2331 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2332 2333 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2334 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2335 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2336 2337 typedef boost::iterator_property_map<array_ref_type::iterator, 2338 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2339 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2340 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2341 2342 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2343 PetscFunctionReturn(0); 2344 } 2345 #endif 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "MatDestroy_MatRedundant" 2349 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2350 { 2351 PetscErrorCode ierr; 2352 Mat_Redundant *redund; 2353 PetscInt i; 2354 PetscMPIInt size; 2355 2356 PetscFunctionBegin; 2357 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2358 if (size == 1) { 2359 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2360 redund = a->redundant; 2361 } else { 2362 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2363 redund = a->redundant; 2364 } 2365 if (redund){ 2366 if (redund->matseq) { /* via MatGetSubMatrices() */ 2367 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2368 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2369 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2370 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2371 } else { 2372 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2373 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2374 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2375 for (i=0; i<redund->nrecvs; i++) { 2376 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2377 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2378 } 2379 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2380 } 2381 2382 if (redund->psubcomm) { 2383 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2384 } 2385 ierr = redund->Destroy(A);CHKERRQ(ierr); 2386 ierr = PetscFree(redund);CHKERRQ(ierr); 2387 } 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2393 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2394 { 2395 PetscMPIInt rank,size; 2396 MPI_Comm comm; 2397 PetscErrorCode ierr; 2398 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2399 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2400 PetscInt *rowrange = mat->rmap->range; 2401 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2402 Mat A = aij->A,B=aij->B,C=*matredundant; 2403 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2404 PetscScalar *sbuf_a; 2405 PetscInt nzlocal=a->nz+b->nz; 2406 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2407 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2408 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2409 MatScalar *aworkA,*aworkB; 2410 PetscScalar *vals; 2411 PetscMPIInt tag1,tag2,tag3,imdex; 2412 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2413 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2414 MPI_Status recv_status,*send_status; 2415 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2416 PetscInt **rbuf_j=NULL; 2417 PetscScalar **rbuf_a=NULL; 2418 Mat_Redundant *redund =NULL; 2419 2420 PetscFunctionBegin; 2421 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2422 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2423 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2424 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2425 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2426 2427 if (reuse == MAT_REUSE_MATRIX) { 2428 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2429 if (subsize == 1) { 2430 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2431 redund = c->redundant; 2432 } else { 2433 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2434 redund = c->redundant; 2435 } 2436 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2437 2438 nsends = redund->nsends; 2439 nrecvs = redund->nrecvs; 2440 send_rank = redund->send_rank; 2441 recv_rank = redund->recv_rank; 2442 sbuf_nz = redund->sbuf_nz; 2443 rbuf_nz = redund->rbuf_nz; 2444 sbuf_j = redund->sbuf_j; 2445 sbuf_a = redund->sbuf_a; 2446 rbuf_j = redund->rbuf_j; 2447 rbuf_a = redund->rbuf_a; 2448 } 2449 2450 if (reuse == MAT_INITIAL_MATRIX) { 2451 PetscInt nleftover,np_subcomm; 2452 2453 /* get the destination processors' id send_rank, nsends and nrecvs */ 2454 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2455 2456 np_subcomm = size/nsubcomm; 2457 nleftover = size - nsubcomm*np_subcomm; 2458 2459 /* block of codes below is specific for INTERLACED */ 2460 /* ------------------------------------------------*/ 2461 nsends = 0; nrecvs = 0; 2462 for (i=0; i<size; i++) { 2463 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2464 send_rank[nsends++] = i; 2465 recv_rank[nrecvs++] = i; 2466 } 2467 } 2468 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2469 i = size-nleftover-1; 2470 j = 0; 2471 while (j < nsubcomm - nleftover) { 2472 send_rank[nsends++] = i; 2473 i--; j++; 2474 } 2475 } 2476 2477 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2478 for (i=0; i<nleftover; i++) { 2479 recv_rank[nrecvs++] = size-nleftover+i; 2480 } 2481 } 2482 /*----------------------------------------------*/ 2483 2484 /* allocate sbuf_j, sbuf_a */ 2485 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2486 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2487 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2488 /* 2489 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2490 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2491 */ 2492 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2493 2494 /* copy mat's local entries into the buffers */ 2495 if (reuse == MAT_INITIAL_MATRIX) { 2496 rownz_max = 0; 2497 rptr = sbuf_j; 2498 cols = sbuf_j + rend-rstart + 1; 2499 vals = sbuf_a; 2500 rptr[0] = 0; 2501 for (i=0; i<rend-rstart; i++) { 2502 row = i + rstart; 2503 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2504 ncols = nzA + nzB; 2505 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2506 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2507 /* load the column indices for this row into cols */ 2508 lwrite = 0; 2509 for (l=0; l<nzB; l++) { 2510 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2511 vals[lwrite] = aworkB[l]; 2512 cols[lwrite++] = ctmp; 2513 } 2514 } 2515 for (l=0; l<nzA; l++) { 2516 vals[lwrite] = aworkA[l]; 2517 cols[lwrite++] = cstart + cworkA[l]; 2518 } 2519 for (l=0; l<nzB; l++) { 2520 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2521 vals[lwrite] = aworkB[l]; 2522 cols[lwrite++] = ctmp; 2523 } 2524 } 2525 vals += ncols; 2526 cols += ncols; 2527 rptr[i+1] = rptr[i] + ncols; 2528 if (rownz_max < ncols) rownz_max = ncols; 2529 } 2530 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2531 } else { /* only copy matrix values into sbuf_a */ 2532 rptr = sbuf_j; 2533 vals = sbuf_a; 2534 rptr[0] = 0; 2535 for (i=0; i<rend-rstart; i++) { 2536 row = i + rstart; 2537 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2538 ncols = nzA + nzB; 2539 cworkB = b->j + b->i[i]; 2540 aworkA = a->a + a->i[i]; 2541 aworkB = b->a + b->i[i]; 2542 lwrite = 0; 2543 for (l=0; l<nzB; l++) { 2544 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2545 } 2546 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2547 for (l=0; l<nzB; l++) { 2548 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2549 } 2550 vals += ncols; 2551 rptr[i+1] = rptr[i] + ncols; 2552 } 2553 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2554 2555 /* send nzlocal to others, and recv other's nzlocal */ 2556 /*--------------------------------------------------*/ 2557 if (reuse == MAT_INITIAL_MATRIX) { 2558 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2559 2560 s_waits2 = s_waits3 + nsends; 2561 s_waits1 = s_waits2 + nsends; 2562 r_waits1 = s_waits1 + nsends; 2563 r_waits2 = r_waits1 + nrecvs; 2564 r_waits3 = r_waits2 + nrecvs; 2565 } else { 2566 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2567 2568 r_waits3 = s_waits3 + nsends; 2569 } 2570 2571 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2572 if (reuse == MAT_INITIAL_MATRIX) { 2573 /* get new tags to keep the communication clean */ 2574 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2575 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2576 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2577 2578 /* post receives of other's nzlocal */ 2579 for (i=0; i<nrecvs; i++) { 2580 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2581 } 2582 /* send nzlocal to others */ 2583 for (i=0; i<nsends; i++) { 2584 sbuf_nz[i] = nzlocal; 2585 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2586 } 2587 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2588 count = nrecvs; 2589 while (count) { 2590 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2591 2592 recv_rank[imdex] = recv_status.MPI_SOURCE; 2593 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2594 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2595 2596 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2597 2598 rbuf_nz[imdex] += i + 2; 2599 2600 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2601 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2602 count--; 2603 } 2604 /* wait on sends of nzlocal */ 2605 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2606 /* send mat->i,j to others, and recv from other's */ 2607 /*------------------------------------------------*/ 2608 for (i=0; i<nsends; i++) { 2609 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2610 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2611 } 2612 /* wait on receives of mat->i,j */ 2613 /*------------------------------*/ 2614 count = nrecvs; 2615 while (count) { 2616 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2617 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2618 count--; 2619 } 2620 /* wait on sends of mat->i,j */ 2621 /*---------------------------*/ 2622 if (nsends) { 2623 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2624 } 2625 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2626 2627 /* post receives, send and receive mat->a */ 2628 /*----------------------------------------*/ 2629 for (imdex=0; imdex<nrecvs; imdex++) { 2630 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2631 } 2632 for (i=0; i<nsends; i++) { 2633 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2634 } 2635 count = nrecvs; 2636 while (count) { 2637 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2638 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2639 count--; 2640 } 2641 if (nsends) { 2642 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2643 } 2644 2645 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2646 2647 /* create redundant matrix */ 2648 /*-------------------------*/ 2649 if (reuse == MAT_INITIAL_MATRIX) { 2650 const PetscInt *range; 2651 PetscInt rstart_sub,rend_sub,mloc_sub; 2652 2653 /* compute rownz_max for preallocation */ 2654 for (imdex=0; imdex<nrecvs; imdex++) { 2655 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2656 rptr = rbuf_j[imdex]; 2657 for (i=0; i<j; i++) { 2658 ncols = rptr[i+1] - rptr[i]; 2659 if (rownz_max < ncols) rownz_max = ncols; 2660 } 2661 } 2662 2663 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2664 2665 /* get local size of redundant matrix 2666 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2667 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2668 rstart_sub = range[nsubcomm*subrank]; 2669 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2670 rend_sub = range[nsubcomm*(subrank+1)]; 2671 } else { 2672 rend_sub = mat->rmap->N; 2673 } 2674 mloc_sub = rend_sub - rstart_sub; 2675 2676 if (M == N) { 2677 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2678 } else { /* non-square matrix */ 2679 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2680 } 2681 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2682 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2683 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2684 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2685 } else { 2686 C = *matredundant; 2687 } 2688 2689 /* insert local matrix entries */ 2690 rptr = sbuf_j; 2691 cols = sbuf_j + rend-rstart + 1; 2692 vals = sbuf_a; 2693 for (i=0; i<rend-rstart; i++) { 2694 row = i + rstart; 2695 ncols = rptr[i+1] - rptr[i]; 2696 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2697 vals += ncols; 2698 cols += ncols; 2699 } 2700 /* insert received matrix entries */ 2701 for (imdex=0; imdex<nrecvs; imdex++) { 2702 rstart = rowrange[recv_rank[imdex]]; 2703 rend = rowrange[recv_rank[imdex]+1]; 2704 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2705 rptr = rbuf_j[imdex]; 2706 cols = rbuf_j[imdex] + rend-rstart + 1; 2707 vals = rbuf_a[imdex]; 2708 for (i=0; i<rend-rstart; i++) { 2709 row = i + rstart; 2710 ncols = rptr[i+1] - rptr[i]; 2711 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2712 vals += ncols; 2713 cols += ncols; 2714 } 2715 } 2716 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2717 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2718 2719 if (reuse == MAT_INITIAL_MATRIX) { 2720 *matredundant = C; 2721 2722 /* create a supporting struct and attach it to C for reuse */ 2723 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2724 if (subsize == 1) { 2725 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2726 c->redundant = redund; 2727 } else { 2728 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2729 c->redundant = redund; 2730 } 2731 2732 redund->nzlocal = nzlocal; 2733 redund->nsends = nsends; 2734 redund->nrecvs = nrecvs; 2735 redund->send_rank = send_rank; 2736 redund->recv_rank = recv_rank; 2737 redund->sbuf_nz = sbuf_nz; 2738 redund->rbuf_nz = rbuf_nz; 2739 redund->sbuf_j = sbuf_j; 2740 redund->sbuf_a = sbuf_a; 2741 redund->rbuf_j = rbuf_j; 2742 redund->rbuf_a = rbuf_a; 2743 redund->psubcomm = NULL; 2744 2745 redund->Destroy = C->ops->destroy; 2746 C->ops->destroy = MatDestroy_MatRedundant; 2747 } 2748 PetscFunctionReturn(0); 2749 } 2750 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2753 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2754 { 2755 PetscErrorCode ierr; 2756 MPI_Comm comm; 2757 PetscMPIInt size,subsize; 2758 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2759 Mat_Redundant *redund=NULL; 2760 PetscSubcomm psubcomm=NULL; 2761 MPI_Comm subcomm_in=subcomm; 2762 Mat *matseq; 2763 IS isrow,iscol; 2764 2765 PetscFunctionBegin; 2766 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2767 if (reuse == MAT_INITIAL_MATRIX) { 2768 /* create psubcomm, then get subcomm */ 2769 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2770 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2771 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2772 2773 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2774 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2775 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2776 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2777 subcomm = psubcomm->comm; 2778 } else { /* retrieve psubcomm and subcomm */ 2779 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2780 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2781 if (subsize == 1) { 2782 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2783 redund = c->redundant; 2784 } else { 2785 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2786 redund = c->redundant; 2787 } 2788 psubcomm = redund->psubcomm; 2789 } 2790 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2791 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2792 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2793 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2794 if (subsize == 1) { 2795 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2796 c->redundant->psubcomm = psubcomm; 2797 } else { 2798 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2799 c->redundant->psubcomm = psubcomm ; 2800 } 2801 } 2802 PetscFunctionReturn(0); 2803 } 2804 } 2805 2806 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2807 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2808 if (reuse == MAT_INITIAL_MATRIX) { 2809 /* create a local sequential matrix matseq[0] */ 2810 mloc_sub = PETSC_DECIDE; 2811 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2812 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2813 rstart = rend - mloc_sub; 2814 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2815 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2816 } else { /* reuse == MAT_REUSE_MATRIX */ 2817 if (subsize == 1) { 2818 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2819 redund = c->redundant; 2820 } else { 2821 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2822 redund = c->redundant; 2823 } 2824 2825 isrow = redund->isrow; 2826 iscol = redund->iscol; 2827 matseq = redund->matseq; 2828 } 2829 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2830 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2831 2832 if (reuse == MAT_INITIAL_MATRIX) { 2833 /* create a supporting struct and attach it to C for reuse */ 2834 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2835 if (subsize == 1) { 2836 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2837 c->redundant = redund; 2838 } else { 2839 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2840 c->redundant = redund; 2841 } 2842 redund->isrow = isrow; 2843 redund->iscol = iscol; 2844 redund->matseq = matseq; 2845 redund->psubcomm = psubcomm; 2846 redund->Destroy = (*matredundant)->ops->destroy; 2847 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2848 } 2849 PetscFunctionReturn(0); 2850 } 2851 2852 #undef __FUNCT__ 2853 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2854 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2855 { 2856 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2857 PetscErrorCode ierr; 2858 PetscInt i,*idxb = 0; 2859 PetscScalar *va,*vb; 2860 Vec vtmp; 2861 2862 PetscFunctionBegin; 2863 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2864 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2865 if (idx) { 2866 for (i=0; i<A->rmap->n; i++) { 2867 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2868 } 2869 } 2870 2871 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2872 if (idx) { 2873 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2874 } 2875 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2876 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2877 2878 for (i=0; i<A->rmap->n; i++) { 2879 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2880 va[i] = vb[i]; 2881 if (idx) idx[i] = a->garray[idxb[i]]; 2882 } 2883 } 2884 2885 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2886 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2887 ierr = PetscFree(idxb);CHKERRQ(ierr); 2888 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2889 PetscFunctionReturn(0); 2890 } 2891 2892 #undef __FUNCT__ 2893 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2894 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2895 { 2896 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2897 PetscErrorCode ierr; 2898 PetscInt i,*idxb = 0; 2899 PetscScalar *va,*vb; 2900 Vec vtmp; 2901 2902 PetscFunctionBegin; 2903 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2904 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2905 if (idx) { 2906 for (i=0; i<A->cmap->n; i++) { 2907 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2908 } 2909 } 2910 2911 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2912 if (idx) { 2913 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2914 } 2915 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2916 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2917 2918 for (i=0; i<A->rmap->n; i++) { 2919 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2920 va[i] = vb[i]; 2921 if (idx) idx[i] = a->garray[idxb[i]]; 2922 } 2923 } 2924 2925 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2926 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2927 ierr = PetscFree(idxb);CHKERRQ(ierr); 2928 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2929 PetscFunctionReturn(0); 2930 } 2931 2932 #undef __FUNCT__ 2933 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2934 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2935 { 2936 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2937 PetscInt n = A->rmap->n; 2938 PetscInt cstart = A->cmap->rstart; 2939 PetscInt *cmap = mat->garray; 2940 PetscInt *diagIdx, *offdiagIdx; 2941 Vec diagV, offdiagV; 2942 PetscScalar *a, *diagA, *offdiagA; 2943 PetscInt r; 2944 PetscErrorCode ierr; 2945 2946 PetscFunctionBegin; 2947 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2948 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2949 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2950 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2951 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2952 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2953 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2954 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2955 for (r = 0; r < n; ++r) { 2956 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2957 a[r] = diagA[r]; 2958 idx[r] = cstart + diagIdx[r]; 2959 } else { 2960 a[r] = offdiagA[r]; 2961 idx[r] = cmap[offdiagIdx[r]]; 2962 } 2963 } 2964 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2965 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2966 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2967 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2968 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2969 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2975 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2976 { 2977 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2978 PetscInt n = A->rmap->n; 2979 PetscInt cstart = A->cmap->rstart; 2980 PetscInt *cmap = mat->garray; 2981 PetscInt *diagIdx, *offdiagIdx; 2982 Vec diagV, offdiagV; 2983 PetscScalar *a, *diagA, *offdiagA; 2984 PetscInt r; 2985 PetscErrorCode ierr; 2986 2987 PetscFunctionBegin; 2988 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2989 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2990 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2991 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2992 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2993 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2994 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2995 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2996 for (r = 0; r < n; ++r) { 2997 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2998 a[r] = diagA[r]; 2999 idx[r] = cstart + diagIdx[r]; 3000 } else { 3001 a[r] = offdiagA[r]; 3002 idx[r] = cmap[offdiagIdx[r]]; 3003 } 3004 } 3005 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3006 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3007 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3008 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3009 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3010 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3011 PetscFunctionReturn(0); 3012 } 3013 3014 #undef __FUNCT__ 3015 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3016 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3017 { 3018 PetscErrorCode ierr; 3019 Mat *dummy; 3020 3021 PetscFunctionBegin; 3022 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3023 *newmat = *dummy; 3024 ierr = PetscFree(dummy);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 3028 #undef __FUNCT__ 3029 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3030 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3031 { 3032 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3033 PetscErrorCode ierr; 3034 3035 PetscFunctionBegin; 3036 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3037 PetscFunctionReturn(0); 3038 } 3039 3040 #undef __FUNCT__ 3041 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3042 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3043 { 3044 PetscErrorCode ierr; 3045 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3046 3047 PetscFunctionBegin; 3048 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3049 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3050 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3051 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 /* -------------------------------------------------------------------*/ 3056 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3057 MatGetRow_MPIAIJ, 3058 MatRestoreRow_MPIAIJ, 3059 MatMult_MPIAIJ, 3060 /* 4*/ MatMultAdd_MPIAIJ, 3061 MatMultTranspose_MPIAIJ, 3062 MatMultTransposeAdd_MPIAIJ, 3063 #if defined(PETSC_HAVE_PBGL) 3064 MatSolve_MPIAIJ, 3065 #else 3066 0, 3067 #endif 3068 0, 3069 0, 3070 /*10*/ 0, 3071 0, 3072 0, 3073 MatSOR_MPIAIJ, 3074 MatTranspose_MPIAIJ, 3075 /*15*/ MatGetInfo_MPIAIJ, 3076 MatEqual_MPIAIJ, 3077 MatGetDiagonal_MPIAIJ, 3078 MatDiagonalScale_MPIAIJ, 3079 MatNorm_MPIAIJ, 3080 /*20*/ MatAssemblyBegin_MPIAIJ, 3081 MatAssemblyEnd_MPIAIJ, 3082 MatSetOption_MPIAIJ, 3083 MatZeroEntries_MPIAIJ, 3084 /*24*/ MatZeroRows_MPIAIJ, 3085 0, 3086 #if defined(PETSC_HAVE_PBGL) 3087 0, 3088 #else 3089 0, 3090 #endif 3091 0, 3092 0, 3093 /*29*/ MatSetUp_MPIAIJ, 3094 #if defined(PETSC_HAVE_PBGL) 3095 0, 3096 #else 3097 0, 3098 #endif 3099 0, 3100 0, 3101 0, 3102 /*34*/ MatDuplicate_MPIAIJ, 3103 0, 3104 0, 3105 0, 3106 0, 3107 /*39*/ MatAXPY_MPIAIJ, 3108 MatGetSubMatrices_MPIAIJ, 3109 MatIncreaseOverlap_MPIAIJ, 3110 MatGetValues_MPIAIJ, 3111 MatCopy_MPIAIJ, 3112 /*44*/ MatGetRowMax_MPIAIJ, 3113 MatScale_MPIAIJ, 3114 0, 3115 0, 3116 MatZeroRowsColumns_MPIAIJ, 3117 /*49*/ MatSetRandom_MPIAIJ, 3118 0, 3119 0, 3120 0, 3121 0, 3122 /*54*/ MatFDColoringCreate_MPIXAIJ, 3123 0, 3124 MatSetUnfactored_MPIAIJ, 3125 MatPermute_MPIAIJ, 3126 0, 3127 /*59*/ MatGetSubMatrix_MPIAIJ, 3128 MatDestroy_MPIAIJ, 3129 MatView_MPIAIJ, 3130 0, 3131 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3132 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3133 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3134 0, 3135 0, 3136 0, 3137 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3138 MatGetRowMinAbs_MPIAIJ, 3139 0, 3140 MatSetColoring_MPIAIJ, 3141 0, 3142 MatSetValuesAdifor_MPIAIJ, 3143 /*75*/ MatFDColoringApply_AIJ, 3144 0, 3145 0, 3146 0, 3147 MatFindZeroDiagonals_MPIAIJ, 3148 /*80*/ 0, 3149 0, 3150 0, 3151 /*83*/ MatLoad_MPIAIJ, 3152 0, 3153 0, 3154 0, 3155 0, 3156 0, 3157 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3158 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3159 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3160 MatPtAP_MPIAIJ_MPIAIJ, 3161 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3162 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3163 0, 3164 0, 3165 0, 3166 0, 3167 /*99*/ 0, 3168 0, 3169 0, 3170 MatConjugate_MPIAIJ, 3171 0, 3172 /*104*/MatSetValuesRow_MPIAIJ, 3173 MatRealPart_MPIAIJ, 3174 MatImaginaryPart_MPIAIJ, 3175 0, 3176 0, 3177 /*109*/0, 3178 MatGetRedundantMatrix_MPIAIJ, 3179 MatGetRowMin_MPIAIJ, 3180 0, 3181 0, 3182 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3183 0, 3184 0, 3185 0, 3186 0, 3187 /*119*/0, 3188 0, 3189 0, 3190 0, 3191 MatGetMultiProcBlock_MPIAIJ, 3192 /*124*/MatFindNonzeroRows_MPIAIJ, 3193 MatGetColumnNorms_MPIAIJ, 3194 MatInvertBlockDiagonal_MPIAIJ, 3195 0, 3196 MatGetSubMatricesParallel_MPIAIJ, 3197 /*129*/0, 3198 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3199 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3200 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3201 0, 3202 /*134*/0, 3203 0, 3204 0, 3205 0, 3206 0, 3207 /*139*/0, 3208 0, 3209 0, 3210 MatFDColoringSetUp_MPIXAIJ 3211 }; 3212 3213 /* ----------------------------------------------------------------------------------------*/ 3214 3215 #undef __FUNCT__ 3216 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3217 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3218 { 3219 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3220 PetscErrorCode ierr; 3221 3222 PetscFunctionBegin; 3223 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3224 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3225 PetscFunctionReturn(0); 3226 } 3227 3228 #undef __FUNCT__ 3229 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3230 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3231 { 3232 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3233 PetscErrorCode ierr; 3234 3235 PetscFunctionBegin; 3236 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3237 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3238 PetscFunctionReturn(0); 3239 } 3240 3241 #undef __FUNCT__ 3242 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3243 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3244 { 3245 Mat_MPIAIJ *b; 3246 PetscErrorCode ierr; 3247 3248 PetscFunctionBegin; 3249 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3250 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3251 b = (Mat_MPIAIJ*)B->data; 3252 3253 if (!B->preallocated) { 3254 /* Explicitly create 2 MATSEQAIJ matrices. */ 3255 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3256 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3257 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3258 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3259 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3260 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3261 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3262 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3263 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3264 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3265 } 3266 3267 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3268 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3269 B->preallocated = PETSC_TRUE; 3270 PetscFunctionReturn(0); 3271 } 3272 3273 #undef __FUNCT__ 3274 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3275 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3276 { 3277 Mat mat; 3278 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3279 PetscErrorCode ierr; 3280 3281 PetscFunctionBegin; 3282 *newmat = 0; 3283 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3284 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3285 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3286 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3287 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3288 a = (Mat_MPIAIJ*)mat->data; 3289 3290 mat->factortype = matin->factortype; 3291 mat->assembled = PETSC_TRUE; 3292 mat->insertmode = NOT_SET_VALUES; 3293 mat->preallocated = PETSC_TRUE; 3294 3295 a->size = oldmat->size; 3296 a->rank = oldmat->rank; 3297 a->donotstash = oldmat->donotstash; 3298 a->roworiented = oldmat->roworiented; 3299 a->rowindices = 0; 3300 a->rowvalues = 0; 3301 a->getrowactive = PETSC_FALSE; 3302 3303 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3304 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3305 3306 if (oldmat->colmap) { 3307 #if defined(PETSC_USE_CTABLE) 3308 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3309 #else 3310 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3311 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3312 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3313 #endif 3314 } else a->colmap = 0; 3315 if (oldmat->garray) { 3316 PetscInt len; 3317 len = oldmat->B->cmap->n; 3318 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3319 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3320 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3321 } else a->garray = 0; 3322 3323 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3324 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3325 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3326 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3327 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3328 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3329 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3330 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3331 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3332 *newmat = mat; 3333 PetscFunctionReturn(0); 3334 } 3335 3336 3337 3338 #undef __FUNCT__ 3339 #define __FUNCT__ "MatLoad_MPIAIJ" 3340 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3341 { 3342 PetscScalar *vals,*svals; 3343 MPI_Comm comm; 3344 PetscErrorCode ierr; 3345 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3346 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3347 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3348 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3349 PetscInt cend,cstart,n,*rowners,sizesset=1; 3350 int fd; 3351 PetscInt bs = 1; 3352 3353 PetscFunctionBegin; 3354 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3355 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3356 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3357 if (!rank) { 3358 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3359 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3360 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3361 } 3362 3363 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3364 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3365 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3366 3367 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3368 3369 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3370 M = header[1]; N = header[2]; 3371 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3372 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3373 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3374 3375 /* If global sizes are set, check if they are consistent with that given in the file */ 3376 if (sizesset) { 3377 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3378 } 3379 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3380 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3381 3382 /* determine ownership of all (block) rows */ 3383 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3384 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3385 else m = newMat->rmap->n; /* Set by user */ 3386 3387 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3388 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3389 3390 /* First process needs enough room for process with most rows */ 3391 if (!rank) { 3392 mmax = rowners[1]; 3393 for (i=2; i<=size; i++) { 3394 mmax = PetscMax(mmax, rowners[i]); 3395 } 3396 } else mmax = -1; /* unused, but compilers complain */ 3397 3398 rowners[0] = 0; 3399 for (i=2; i<=size; i++) { 3400 rowners[i] += rowners[i-1]; 3401 } 3402 rstart = rowners[rank]; 3403 rend = rowners[rank+1]; 3404 3405 /* distribute row lengths to all processors */ 3406 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3407 if (!rank) { 3408 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3409 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3410 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3411 for (j=0; j<m; j++) { 3412 procsnz[0] += ourlens[j]; 3413 } 3414 for (i=1; i<size; i++) { 3415 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3416 /* calculate the number of nonzeros on each processor */ 3417 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3418 procsnz[i] += rowlengths[j]; 3419 } 3420 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3421 } 3422 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3423 } else { 3424 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3425 } 3426 3427 if (!rank) { 3428 /* determine max buffer needed and allocate it */ 3429 maxnz = 0; 3430 for (i=0; i<size; i++) { 3431 maxnz = PetscMax(maxnz,procsnz[i]); 3432 } 3433 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3434 3435 /* read in my part of the matrix column indices */ 3436 nz = procsnz[0]; 3437 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3438 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3439 3440 /* read in every one elses and ship off */ 3441 for (i=1; i<size; i++) { 3442 nz = procsnz[i]; 3443 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3444 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3445 } 3446 ierr = PetscFree(cols);CHKERRQ(ierr); 3447 } else { 3448 /* determine buffer space needed for message */ 3449 nz = 0; 3450 for (i=0; i<m; i++) { 3451 nz += ourlens[i]; 3452 } 3453 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3454 3455 /* receive message of column indices*/ 3456 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3457 } 3458 3459 /* determine column ownership if matrix is not square */ 3460 if (N != M) { 3461 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3462 else n = newMat->cmap->n; 3463 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3464 cstart = cend - n; 3465 } else { 3466 cstart = rstart; 3467 cend = rend; 3468 n = cend - cstart; 3469 } 3470 3471 /* loop over local rows, determining number of off diagonal entries */ 3472 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3473 jj = 0; 3474 for (i=0; i<m; i++) { 3475 for (j=0; j<ourlens[i]; j++) { 3476 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3477 jj++; 3478 } 3479 } 3480 3481 for (i=0; i<m; i++) { 3482 ourlens[i] -= offlens[i]; 3483 } 3484 if (!sizesset) { 3485 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3486 } 3487 3488 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3489 3490 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3491 3492 for (i=0; i<m; i++) { 3493 ourlens[i] += offlens[i]; 3494 } 3495 3496 if (!rank) { 3497 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3498 3499 /* read in my part of the matrix numerical values */ 3500 nz = procsnz[0]; 3501 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3502 3503 /* insert into matrix */ 3504 jj = rstart; 3505 smycols = mycols; 3506 svals = vals; 3507 for (i=0; i<m; i++) { 3508 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3509 smycols += ourlens[i]; 3510 svals += ourlens[i]; 3511 jj++; 3512 } 3513 3514 /* read in other processors and ship out */ 3515 for (i=1; i<size; i++) { 3516 nz = procsnz[i]; 3517 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3518 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3519 } 3520 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3521 } else { 3522 /* receive numeric values */ 3523 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3524 3525 /* receive message of values*/ 3526 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3527 3528 /* insert into matrix */ 3529 jj = rstart; 3530 smycols = mycols; 3531 svals = vals; 3532 for (i=0; i<m; i++) { 3533 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3534 smycols += ourlens[i]; 3535 svals += ourlens[i]; 3536 jj++; 3537 } 3538 } 3539 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3540 ierr = PetscFree(vals);CHKERRQ(ierr); 3541 ierr = PetscFree(mycols);CHKERRQ(ierr); 3542 ierr = PetscFree(rowners);CHKERRQ(ierr); 3543 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3544 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3545 PetscFunctionReturn(0); 3546 } 3547 3548 #undef __FUNCT__ 3549 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3550 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3551 { 3552 PetscErrorCode ierr; 3553 IS iscol_local; 3554 PetscInt csize; 3555 3556 PetscFunctionBegin; 3557 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3558 if (call == MAT_REUSE_MATRIX) { 3559 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3560 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3561 } else { 3562 PetscInt cbs; 3563 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3564 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3565 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3566 } 3567 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3568 if (call == MAT_INITIAL_MATRIX) { 3569 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3570 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3571 } 3572 PetscFunctionReturn(0); 3573 } 3574 3575 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3576 #undef __FUNCT__ 3577 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3578 /* 3579 Not great since it makes two copies of the submatrix, first an SeqAIJ 3580 in local and then by concatenating the local matrices the end result. 3581 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3582 3583 Note: This requires a sequential iscol with all indices. 3584 */ 3585 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3586 { 3587 PetscErrorCode ierr; 3588 PetscMPIInt rank,size; 3589 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3590 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3591 PetscBool allcolumns, colflag; 3592 Mat M,Mreuse; 3593 MatScalar *vwork,*aa; 3594 MPI_Comm comm; 3595 Mat_SeqAIJ *aij; 3596 3597 PetscFunctionBegin; 3598 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3599 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3600 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3601 3602 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3603 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3604 if (colflag && ncol == mat->cmap->N) { 3605 allcolumns = PETSC_TRUE; 3606 } else { 3607 allcolumns = PETSC_FALSE; 3608 } 3609 if (call == MAT_REUSE_MATRIX) { 3610 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3611 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3612 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3613 } else { 3614 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3615 } 3616 3617 /* 3618 m - number of local rows 3619 n - number of columns (same on all processors) 3620 rstart - first row in new global matrix generated 3621 */ 3622 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3623 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3624 if (call == MAT_INITIAL_MATRIX) { 3625 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3626 ii = aij->i; 3627 jj = aij->j; 3628 3629 /* 3630 Determine the number of non-zeros in the diagonal and off-diagonal 3631 portions of the matrix in order to do correct preallocation 3632 */ 3633 3634 /* first get start and end of "diagonal" columns */ 3635 if (csize == PETSC_DECIDE) { 3636 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3637 if (mglobal == n) { /* square matrix */ 3638 nlocal = m; 3639 } else { 3640 nlocal = n/size + ((n % size) > rank); 3641 } 3642 } else { 3643 nlocal = csize; 3644 } 3645 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3646 rstart = rend - nlocal; 3647 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3648 3649 /* next, compute all the lengths */ 3650 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3651 olens = dlens + m; 3652 for (i=0; i<m; i++) { 3653 jend = ii[i+1] - ii[i]; 3654 olen = 0; 3655 dlen = 0; 3656 for (j=0; j<jend; j++) { 3657 if (*jj < rstart || *jj >= rend) olen++; 3658 else dlen++; 3659 jj++; 3660 } 3661 olens[i] = olen; 3662 dlens[i] = dlen; 3663 } 3664 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3665 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3666 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3667 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3668 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3669 ierr = PetscFree(dlens);CHKERRQ(ierr); 3670 } else { 3671 PetscInt ml,nl; 3672 3673 M = *newmat; 3674 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3675 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3676 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3677 /* 3678 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3679 rather than the slower MatSetValues(). 3680 */ 3681 M->was_assembled = PETSC_TRUE; 3682 M->assembled = PETSC_FALSE; 3683 } 3684 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3685 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3686 ii = aij->i; 3687 jj = aij->j; 3688 aa = aij->a; 3689 for (i=0; i<m; i++) { 3690 row = rstart + i; 3691 nz = ii[i+1] - ii[i]; 3692 cwork = jj; jj += nz; 3693 vwork = aa; aa += nz; 3694 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3695 } 3696 3697 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3698 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3699 *newmat = M; 3700 3701 /* save submatrix used in processor for next request */ 3702 if (call == MAT_INITIAL_MATRIX) { 3703 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3704 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3705 } 3706 PetscFunctionReturn(0); 3707 } 3708 3709 #undef __FUNCT__ 3710 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3711 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3712 { 3713 PetscInt m,cstart, cend,j,nnz,i,d; 3714 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3715 const PetscInt *JJ; 3716 PetscScalar *values; 3717 PetscErrorCode ierr; 3718 3719 PetscFunctionBegin; 3720 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3721 3722 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3723 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3724 m = B->rmap->n; 3725 cstart = B->cmap->rstart; 3726 cend = B->cmap->rend; 3727 rstart = B->rmap->rstart; 3728 3729 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3730 3731 #if defined(PETSC_USE_DEBUGGING) 3732 for (i=0; i<m; i++) { 3733 nnz = Ii[i+1]- Ii[i]; 3734 JJ = J + Ii[i]; 3735 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3736 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3737 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3738 } 3739 #endif 3740 3741 for (i=0; i<m; i++) { 3742 nnz = Ii[i+1]- Ii[i]; 3743 JJ = J + Ii[i]; 3744 nnz_max = PetscMax(nnz_max,nnz); 3745 d = 0; 3746 for (j=0; j<nnz; j++) { 3747 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3748 } 3749 d_nnz[i] = d; 3750 o_nnz[i] = nnz - d; 3751 } 3752 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3753 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3754 3755 if (v) values = (PetscScalar*)v; 3756 else { 3757 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3758 } 3759 3760 for (i=0; i<m; i++) { 3761 ii = i + rstart; 3762 nnz = Ii[i+1]- Ii[i]; 3763 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3764 } 3765 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3766 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3767 3768 if (!v) { 3769 ierr = PetscFree(values);CHKERRQ(ierr); 3770 } 3771 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3772 PetscFunctionReturn(0); 3773 } 3774 3775 #undef __FUNCT__ 3776 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3777 /*@ 3778 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3779 (the default parallel PETSc format). 3780 3781 Collective on MPI_Comm 3782 3783 Input Parameters: 3784 + B - the matrix 3785 . i - the indices into j for the start of each local row (starts with zero) 3786 . j - the column indices for each local row (starts with zero) 3787 - v - optional values in the matrix 3788 3789 Level: developer 3790 3791 Notes: 3792 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3793 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3794 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3795 3796 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3797 3798 The format which is used for the sparse matrix input, is equivalent to a 3799 row-major ordering.. i.e for the following matrix, the input data expected is 3800 as shown: 3801 3802 1 0 0 3803 2 0 3 P0 3804 ------- 3805 4 5 6 P1 3806 3807 Process0 [P0]: rows_owned=[0,1] 3808 i = {0,1,3} [size = nrow+1 = 2+1] 3809 j = {0,0,2} [size = nz = 6] 3810 v = {1,2,3} [size = nz = 6] 3811 3812 Process1 [P1]: rows_owned=[2] 3813 i = {0,3} [size = nrow+1 = 1+1] 3814 j = {0,1,2} [size = nz = 6] 3815 v = {4,5,6} [size = nz = 6] 3816 3817 .keywords: matrix, aij, compressed row, sparse, parallel 3818 3819 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3820 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3821 @*/ 3822 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3823 { 3824 PetscErrorCode ierr; 3825 3826 PetscFunctionBegin; 3827 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3828 PetscFunctionReturn(0); 3829 } 3830 3831 #undef __FUNCT__ 3832 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3833 /*@C 3834 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3835 (the default parallel PETSc format). For good matrix assembly performance 3836 the user should preallocate the matrix storage by setting the parameters 3837 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3838 performance can be increased by more than a factor of 50. 3839 3840 Collective on MPI_Comm 3841 3842 Input Parameters: 3843 + A - the matrix 3844 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3845 (same value is used for all local rows) 3846 . d_nnz - array containing the number of nonzeros in the various rows of the 3847 DIAGONAL portion of the local submatrix (possibly different for each row) 3848 or NULL, if d_nz is used to specify the nonzero structure. 3849 The size of this array is equal to the number of local rows, i.e 'm'. 3850 For matrices that will be factored, you must leave room for (and set) 3851 the diagonal entry even if it is zero. 3852 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3853 submatrix (same value is used for all local rows). 3854 - o_nnz - array containing the number of nonzeros in the various rows of the 3855 OFF-DIAGONAL portion of the local submatrix (possibly different for 3856 each row) or NULL, if o_nz is used to specify the nonzero 3857 structure. The size of this array is equal to the number 3858 of local rows, i.e 'm'. 3859 3860 If the *_nnz parameter is given then the *_nz parameter is ignored 3861 3862 The AIJ format (also called the Yale sparse matrix format or 3863 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3864 storage. The stored row and column indices begin with zero. 3865 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3866 3867 The parallel matrix is partitioned such that the first m0 rows belong to 3868 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3869 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3870 3871 The DIAGONAL portion of the local submatrix of a processor can be defined 3872 as the submatrix which is obtained by extraction the part corresponding to 3873 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3874 first row that belongs to the processor, r2 is the last row belonging to 3875 the this processor, and c1-c2 is range of indices of the local part of a 3876 vector suitable for applying the matrix to. This is an mxn matrix. In the 3877 common case of a square matrix, the row and column ranges are the same and 3878 the DIAGONAL part is also square. The remaining portion of the local 3879 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3880 3881 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3882 3883 You can call MatGetInfo() to get information on how effective the preallocation was; 3884 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3885 You can also run with the option -info and look for messages with the string 3886 malloc in them to see if additional memory allocation was needed. 3887 3888 Example usage: 3889 3890 Consider the following 8x8 matrix with 34 non-zero values, that is 3891 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3892 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3893 as follows: 3894 3895 .vb 3896 1 2 0 | 0 3 0 | 0 4 3897 Proc0 0 5 6 | 7 0 0 | 8 0 3898 9 0 10 | 11 0 0 | 12 0 3899 ------------------------------------- 3900 13 0 14 | 15 16 17 | 0 0 3901 Proc1 0 18 0 | 19 20 21 | 0 0 3902 0 0 0 | 22 23 0 | 24 0 3903 ------------------------------------- 3904 Proc2 25 26 27 | 0 0 28 | 29 0 3905 30 0 0 | 31 32 33 | 0 34 3906 .ve 3907 3908 This can be represented as a collection of submatrices as: 3909 3910 .vb 3911 A B C 3912 D E F 3913 G H I 3914 .ve 3915 3916 Where the submatrices A,B,C are owned by proc0, D,E,F are 3917 owned by proc1, G,H,I are owned by proc2. 3918 3919 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3920 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3921 The 'M','N' parameters are 8,8, and have the same values on all procs. 3922 3923 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3924 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3925 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3926 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3927 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3928 matrix, ans [DF] as another SeqAIJ matrix. 3929 3930 When d_nz, o_nz parameters are specified, d_nz storage elements are 3931 allocated for every row of the local diagonal submatrix, and o_nz 3932 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3933 One way to choose d_nz and o_nz is to use the max nonzerors per local 3934 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3935 In this case, the values of d_nz,o_nz are: 3936 .vb 3937 proc0 : dnz = 2, o_nz = 2 3938 proc1 : dnz = 3, o_nz = 2 3939 proc2 : dnz = 1, o_nz = 4 3940 .ve 3941 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3942 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3943 for proc3. i.e we are using 12+15+10=37 storage locations to store 3944 34 values. 3945 3946 When d_nnz, o_nnz parameters are specified, the storage is specified 3947 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3948 In the above case the values for d_nnz,o_nnz are: 3949 .vb 3950 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3951 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3952 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3953 .ve 3954 Here the space allocated is sum of all the above values i.e 34, and 3955 hence pre-allocation is perfect. 3956 3957 Level: intermediate 3958 3959 .keywords: matrix, aij, compressed row, sparse, parallel 3960 3961 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3962 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3963 @*/ 3964 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3965 { 3966 PetscErrorCode ierr; 3967 3968 PetscFunctionBegin; 3969 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3970 PetscValidType(B,1); 3971 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3972 PetscFunctionReturn(0); 3973 } 3974 3975 #undef __FUNCT__ 3976 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3977 /*@ 3978 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3979 CSR format the local rows. 3980 3981 Collective on MPI_Comm 3982 3983 Input Parameters: 3984 + comm - MPI communicator 3985 . m - number of local rows (Cannot be PETSC_DECIDE) 3986 . n - This value should be the same as the local size used in creating the 3987 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3988 calculated if N is given) For square matrices n is almost always m. 3989 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3990 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3991 . i - row indices 3992 . j - column indices 3993 - a - matrix values 3994 3995 Output Parameter: 3996 . mat - the matrix 3997 3998 Level: intermediate 3999 4000 Notes: 4001 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4002 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4003 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4004 4005 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4006 4007 The format which is used for the sparse matrix input, is equivalent to a 4008 row-major ordering.. i.e for the following matrix, the input data expected is 4009 as shown: 4010 4011 1 0 0 4012 2 0 3 P0 4013 ------- 4014 4 5 6 P1 4015 4016 Process0 [P0]: rows_owned=[0,1] 4017 i = {0,1,3} [size = nrow+1 = 2+1] 4018 j = {0,0,2} [size = nz = 6] 4019 v = {1,2,3} [size = nz = 6] 4020 4021 Process1 [P1]: rows_owned=[2] 4022 i = {0,3} [size = nrow+1 = 1+1] 4023 j = {0,1,2} [size = nz = 6] 4024 v = {4,5,6} [size = nz = 6] 4025 4026 .keywords: matrix, aij, compressed row, sparse, parallel 4027 4028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4029 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4030 @*/ 4031 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4032 { 4033 PetscErrorCode ierr; 4034 4035 PetscFunctionBegin; 4036 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4037 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4038 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4039 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4040 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4041 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4042 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4043 PetscFunctionReturn(0); 4044 } 4045 4046 #undef __FUNCT__ 4047 #define __FUNCT__ "MatCreateAIJ" 4048 /*@C 4049 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4050 (the default parallel PETSc format). For good matrix assembly performance 4051 the user should preallocate the matrix storage by setting the parameters 4052 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4053 performance can be increased by more than a factor of 50. 4054 4055 Collective on MPI_Comm 4056 4057 Input Parameters: 4058 + comm - MPI communicator 4059 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4060 This value should be the same as the local size used in creating the 4061 y vector for the matrix-vector product y = Ax. 4062 . n - This value should be the same as the local size used in creating the 4063 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4064 calculated if N is given) For square matrices n is almost always m. 4065 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4066 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4067 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4068 (same value is used for all local rows) 4069 . d_nnz - array containing the number of nonzeros in the various rows of the 4070 DIAGONAL portion of the local submatrix (possibly different for each row) 4071 or NULL, if d_nz is used to specify the nonzero structure. 4072 The size of this array is equal to the number of local rows, i.e 'm'. 4073 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4074 submatrix (same value is used for all local rows). 4075 - o_nnz - array containing the number of nonzeros in the various rows of the 4076 OFF-DIAGONAL portion of the local submatrix (possibly different for 4077 each row) or NULL, if o_nz is used to specify the nonzero 4078 structure. The size of this array is equal to the number 4079 of local rows, i.e 'm'. 4080 4081 Output Parameter: 4082 . A - the matrix 4083 4084 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4085 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4086 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4087 4088 Notes: 4089 If the *_nnz parameter is given then the *_nz parameter is ignored 4090 4091 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4092 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4093 storage requirements for this matrix. 4094 4095 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4096 processor than it must be used on all processors that share the object for 4097 that argument. 4098 4099 The user MUST specify either the local or global matrix dimensions 4100 (possibly both). 4101 4102 The parallel matrix is partitioned across processors such that the 4103 first m0 rows belong to process 0, the next m1 rows belong to 4104 process 1, the next m2 rows belong to process 2 etc.. where 4105 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4106 values corresponding to [m x N] submatrix. 4107 4108 The columns are logically partitioned with the n0 columns belonging 4109 to 0th partition, the next n1 columns belonging to the next 4110 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4111 4112 The DIAGONAL portion of the local submatrix on any given processor 4113 is the submatrix corresponding to the rows and columns m,n 4114 corresponding to the given processor. i.e diagonal matrix on 4115 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4116 etc. The remaining portion of the local submatrix [m x (N-n)] 4117 constitute the OFF-DIAGONAL portion. The example below better 4118 illustrates this concept. 4119 4120 For a square global matrix we define each processor's diagonal portion 4121 to be its local rows and the corresponding columns (a square submatrix); 4122 each processor's off-diagonal portion encompasses the remainder of the 4123 local matrix (a rectangular submatrix). 4124 4125 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4126 4127 When calling this routine with a single process communicator, a matrix of 4128 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4129 type of communicator, use the construction mechanism: 4130 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4131 4132 By default, this format uses inodes (identical nodes) when possible. 4133 We search for consecutive rows with the same nonzero structure, thereby 4134 reusing matrix information to achieve increased efficiency. 4135 4136 Options Database Keys: 4137 + -mat_no_inode - Do not use inodes 4138 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4139 - -mat_aij_oneindex - Internally use indexing starting at 1 4140 rather than 0. Note that when calling MatSetValues(), 4141 the user still MUST index entries starting at 0! 4142 4143 4144 Example usage: 4145 4146 Consider the following 8x8 matrix with 34 non-zero values, that is 4147 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4148 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4149 as follows: 4150 4151 .vb 4152 1 2 0 | 0 3 0 | 0 4 4153 Proc0 0 5 6 | 7 0 0 | 8 0 4154 9 0 10 | 11 0 0 | 12 0 4155 ------------------------------------- 4156 13 0 14 | 15 16 17 | 0 0 4157 Proc1 0 18 0 | 19 20 21 | 0 0 4158 0 0 0 | 22 23 0 | 24 0 4159 ------------------------------------- 4160 Proc2 25 26 27 | 0 0 28 | 29 0 4161 30 0 0 | 31 32 33 | 0 34 4162 .ve 4163 4164 This can be represented as a collection of submatrices as: 4165 4166 .vb 4167 A B C 4168 D E F 4169 G H I 4170 .ve 4171 4172 Where the submatrices A,B,C are owned by proc0, D,E,F are 4173 owned by proc1, G,H,I are owned by proc2. 4174 4175 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4176 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4177 The 'M','N' parameters are 8,8, and have the same values on all procs. 4178 4179 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4180 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4181 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4182 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4183 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4184 matrix, ans [DF] as another SeqAIJ matrix. 4185 4186 When d_nz, o_nz parameters are specified, d_nz storage elements are 4187 allocated for every row of the local diagonal submatrix, and o_nz 4188 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4189 One way to choose d_nz and o_nz is to use the max nonzerors per local 4190 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4191 In this case, the values of d_nz,o_nz are: 4192 .vb 4193 proc0 : dnz = 2, o_nz = 2 4194 proc1 : dnz = 3, o_nz = 2 4195 proc2 : dnz = 1, o_nz = 4 4196 .ve 4197 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4198 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4199 for proc3. i.e we are using 12+15+10=37 storage locations to store 4200 34 values. 4201 4202 When d_nnz, o_nnz parameters are specified, the storage is specified 4203 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4204 In the above case the values for d_nnz,o_nnz are: 4205 .vb 4206 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4207 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4208 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4209 .ve 4210 Here the space allocated is sum of all the above values i.e 34, and 4211 hence pre-allocation is perfect. 4212 4213 Level: intermediate 4214 4215 .keywords: matrix, aij, compressed row, sparse, parallel 4216 4217 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4218 MPIAIJ, MatCreateMPIAIJWithArrays() 4219 @*/ 4220 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4221 { 4222 PetscErrorCode ierr; 4223 PetscMPIInt size; 4224 4225 PetscFunctionBegin; 4226 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4227 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4228 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4229 if (size > 1) { 4230 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4231 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4232 } else { 4233 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4234 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4235 } 4236 PetscFunctionReturn(0); 4237 } 4238 4239 #undef __FUNCT__ 4240 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4241 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4242 { 4243 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4244 4245 PetscFunctionBegin; 4246 *Ad = a->A; 4247 *Ao = a->B; 4248 *colmap = a->garray; 4249 PetscFunctionReturn(0); 4250 } 4251 4252 #undef __FUNCT__ 4253 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4254 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4255 { 4256 PetscErrorCode ierr; 4257 PetscInt i; 4258 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4259 4260 PetscFunctionBegin; 4261 if (coloring->ctype == IS_COLORING_GLOBAL) { 4262 ISColoringValue *allcolors,*colors; 4263 ISColoring ocoloring; 4264 4265 /* set coloring for diagonal portion */ 4266 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4267 4268 /* set coloring for off-diagonal portion */ 4269 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4270 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4271 for (i=0; i<a->B->cmap->n; i++) { 4272 colors[i] = allcolors[a->garray[i]]; 4273 } 4274 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4275 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4276 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4277 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4278 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4279 ISColoringValue *colors; 4280 PetscInt *larray; 4281 ISColoring ocoloring; 4282 4283 /* set coloring for diagonal portion */ 4284 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4285 for (i=0; i<a->A->cmap->n; i++) { 4286 larray[i] = i + A->cmap->rstart; 4287 } 4288 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4289 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4290 for (i=0; i<a->A->cmap->n; i++) { 4291 colors[i] = coloring->colors[larray[i]]; 4292 } 4293 ierr = PetscFree(larray);CHKERRQ(ierr); 4294 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4295 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4296 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4297 4298 /* set coloring for off-diagonal portion */ 4299 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4300 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4301 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4302 for (i=0; i<a->B->cmap->n; i++) { 4303 colors[i] = coloring->colors[larray[i]]; 4304 } 4305 ierr = PetscFree(larray);CHKERRQ(ierr); 4306 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4307 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4308 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4309 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4310 PetscFunctionReturn(0); 4311 } 4312 4313 #undef __FUNCT__ 4314 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4315 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4316 { 4317 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4318 PetscErrorCode ierr; 4319 4320 PetscFunctionBegin; 4321 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4322 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4323 PetscFunctionReturn(0); 4324 } 4325 4326 #undef __FUNCT__ 4327 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4328 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4329 { 4330 PetscErrorCode ierr; 4331 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4332 PetscInt *indx; 4333 4334 PetscFunctionBegin; 4335 /* This routine will ONLY return MPIAIJ type matrix */ 4336 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4337 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4338 if (n == PETSC_DECIDE) { 4339 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4340 } 4341 /* Check sum(n) = N */ 4342 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4343 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4344 4345 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4346 rstart -= m; 4347 4348 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4349 for (i=0; i<m; i++) { 4350 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4351 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4352 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4353 } 4354 4355 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4356 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4357 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4358 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4359 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4360 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4361 PetscFunctionReturn(0); 4362 } 4363 4364 #undef __FUNCT__ 4365 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4366 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4367 { 4368 PetscErrorCode ierr; 4369 PetscInt m,N,i,rstart,nnz,Ii; 4370 PetscInt *indx; 4371 PetscScalar *values; 4372 4373 PetscFunctionBegin; 4374 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4375 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4376 for (i=0; i<m; i++) { 4377 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4378 Ii = i + rstart; 4379 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4380 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4381 } 4382 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4383 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4384 PetscFunctionReturn(0); 4385 } 4386 4387 #undef __FUNCT__ 4388 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4389 /*@ 4390 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4391 matrices from each processor 4392 4393 Collective on MPI_Comm 4394 4395 Input Parameters: 4396 + comm - the communicators the parallel matrix will live on 4397 . inmat - the input sequential matrices 4398 . n - number of local columns (or PETSC_DECIDE) 4399 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4400 4401 Output Parameter: 4402 . outmat - the parallel matrix generated 4403 4404 Level: advanced 4405 4406 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4407 4408 @*/ 4409 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4410 { 4411 PetscErrorCode ierr; 4412 PetscMPIInt size; 4413 4414 PetscFunctionBegin; 4415 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4416 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4417 if (size == 1) { 4418 if (scall == MAT_INITIAL_MATRIX) { 4419 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4420 } else { 4421 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4422 } 4423 } else { 4424 if (scall == MAT_INITIAL_MATRIX) { 4425 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4426 } 4427 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4428 } 4429 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4430 PetscFunctionReturn(0); 4431 } 4432 4433 #undef __FUNCT__ 4434 #define __FUNCT__ "MatFileSplit" 4435 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4436 { 4437 PetscErrorCode ierr; 4438 PetscMPIInt rank; 4439 PetscInt m,N,i,rstart,nnz; 4440 size_t len; 4441 const PetscInt *indx; 4442 PetscViewer out; 4443 char *name; 4444 Mat B; 4445 const PetscScalar *values; 4446 4447 PetscFunctionBegin; 4448 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4449 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4450 /* Should this be the type of the diagonal block of A? */ 4451 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4452 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4453 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4454 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4455 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4456 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4457 for (i=0; i<m; i++) { 4458 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4459 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4460 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4461 } 4462 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4463 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4464 4465 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4466 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4467 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4468 sprintf(name,"%s.%d",outfile,rank); 4469 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4470 ierr = PetscFree(name);CHKERRQ(ierr); 4471 ierr = MatView(B,out);CHKERRQ(ierr); 4472 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4473 ierr = MatDestroy(&B);CHKERRQ(ierr); 4474 PetscFunctionReturn(0); 4475 } 4476 4477 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4478 #undef __FUNCT__ 4479 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4480 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4481 { 4482 PetscErrorCode ierr; 4483 Mat_Merge_SeqsToMPI *merge; 4484 PetscContainer container; 4485 4486 PetscFunctionBegin; 4487 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4488 if (container) { 4489 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4502 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4503 ierr = PetscFree(merge);CHKERRQ(ierr); 4504 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4505 } 4506 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4507 PetscFunctionReturn(0); 4508 } 4509 4510 #include <../src/mat/utils/freespace.h> 4511 #include <petscbt.h> 4512 4513 #undef __FUNCT__ 4514 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4515 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4516 { 4517 PetscErrorCode ierr; 4518 MPI_Comm comm; 4519 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4520 PetscMPIInt size,rank,taga,*len_s; 4521 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4522 PetscInt proc,m; 4523 PetscInt **buf_ri,**buf_rj; 4524 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4525 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4526 MPI_Request *s_waits,*r_waits; 4527 MPI_Status *status; 4528 MatScalar *aa=a->a; 4529 MatScalar **abuf_r,*ba_i; 4530 Mat_Merge_SeqsToMPI *merge; 4531 PetscContainer container; 4532 4533 PetscFunctionBegin; 4534 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4535 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4536 4537 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4538 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4539 4540 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4541 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4542 4543 bi = merge->bi; 4544 bj = merge->bj; 4545 buf_ri = merge->buf_ri; 4546 buf_rj = merge->buf_rj; 4547 4548 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4549 owners = merge->rowmap->range; 4550 len_s = merge->len_s; 4551 4552 /* send and recv matrix values */ 4553 /*-----------------------------*/ 4554 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4555 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4556 4557 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4558 for (proc=0,k=0; proc<size; proc++) { 4559 if (!len_s[proc]) continue; 4560 i = owners[proc]; 4561 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4562 k++; 4563 } 4564 4565 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4566 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4567 ierr = PetscFree(status);CHKERRQ(ierr); 4568 4569 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4570 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4571 4572 /* insert mat values of mpimat */ 4573 /*----------------------------*/ 4574 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4575 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4576 4577 for (k=0; k<merge->nrecv; k++) { 4578 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4579 nrows = *(buf_ri_k[k]); 4580 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4581 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4582 } 4583 4584 /* set values of ba */ 4585 m = merge->rowmap->n; 4586 for (i=0; i<m; i++) { 4587 arow = owners[rank] + i; 4588 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4589 bnzi = bi[i+1] - bi[i]; 4590 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4591 4592 /* add local non-zero vals of this proc's seqmat into ba */ 4593 anzi = ai[arow+1] - ai[arow]; 4594 aj = a->j + ai[arow]; 4595 aa = a->a + ai[arow]; 4596 nextaj = 0; 4597 for (j=0; nextaj<anzi; j++) { 4598 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4599 ba_i[j] += aa[nextaj++]; 4600 } 4601 } 4602 4603 /* add received vals into ba */ 4604 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4605 /* i-th row */ 4606 if (i == *nextrow[k]) { 4607 anzi = *(nextai[k]+1) - *nextai[k]; 4608 aj = buf_rj[k] + *(nextai[k]); 4609 aa = abuf_r[k] + *(nextai[k]); 4610 nextaj = 0; 4611 for (j=0; nextaj<anzi; j++) { 4612 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4613 ba_i[j] += aa[nextaj++]; 4614 } 4615 } 4616 nextrow[k]++; nextai[k]++; 4617 } 4618 } 4619 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4620 } 4621 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4622 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4623 4624 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4625 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4626 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4627 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4628 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4629 PetscFunctionReturn(0); 4630 } 4631 4632 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4633 4634 #undef __FUNCT__ 4635 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4636 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4637 { 4638 PetscErrorCode ierr; 4639 Mat B_mpi; 4640 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4641 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4642 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4643 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4644 PetscInt len,proc,*dnz,*onz,bs,cbs; 4645 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4646 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4647 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4648 MPI_Status *status; 4649 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4650 PetscBT lnkbt; 4651 Mat_Merge_SeqsToMPI *merge; 4652 PetscContainer container; 4653 4654 PetscFunctionBegin; 4655 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4656 4657 /* make sure it is a PETSc comm */ 4658 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4659 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4660 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4661 4662 ierr = PetscNew(&merge);CHKERRQ(ierr); 4663 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4664 4665 /* determine row ownership */ 4666 /*---------------------------------------------------------*/ 4667 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4668 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4669 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4670 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4671 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4672 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4673 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4674 4675 m = merge->rowmap->n; 4676 owners = merge->rowmap->range; 4677 4678 /* determine the number of messages to send, their lengths */ 4679 /*---------------------------------------------------------*/ 4680 len_s = merge->len_s; 4681 4682 len = 0; /* length of buf_si[] */ 4683 merge->nsend = 0; 4684 for (proc=0; proc<size; proc++) { 4685 len_si[proc] = 0; 4686 if (proc == rank) { 4687 len_s[proc] = 0; 4688 } else { 4689 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4690 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4691 } 4692 if (len_s[proc]) { 4693 merge->nsend++; 4694 nrows = 0; 4695 for (i=owners[proc]; i<owners[proc+1]; i++) { 4696 if (ai[i+1] > ai[i]) nrows++; 4697 } 4698 len_si[proc] = 2*(nrows+1); 4699 len += len_si[proc]; 4700 } 4701 } 4702 4703 /* determine the number and length of messages to receive for ij-structure */ 4704 /*-------------------------------------------------------------------------*/ 4705 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4706 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4707 4708 /* post the Irecv of j-structure */ 4709 /*-------------------------------*/ 4710 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4711 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4712 4713 /* post the Isend of j-structure */ 4714 /*--------------------------------*/ 4715 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4716 4717 for (proc=0, k=0; proc<size; proc++) { 4718 if (!len_s[proc]) continue; 4719 i = owners[proc]; 4720 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4721 k++; 4722 } 4723 4724 /* receives and sends of j-structure are complete */ 4725 /*------------------------------------------------*/ 4726 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4727 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4728 4729 /* send and recv i-structure */ 4730 /*---------------------------*/ 4731 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4732 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4733 4734 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4735 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4736 for (proc=0,k=0; proc<size; proc++) { 4737 if (!len_s[proc]) continue; 4738 /* form outgoing message for i-structure: 4739 buf_si[0]: nrows to be sent 4740 [1:nrows]: row index (global) 4741 [nrows+1:2*nrows+1]: i-structure index 4742 */ 4743 /*-------------------------------------------*/ 4744 nrows = len_si[proc]/2 - 1; 4745 buf_si_i = buf_si + nrows+1; 4746 buf_si[0] = nrows; 4747 buf_si_i[0] = 0; 4748 nrows = 0; 4749 for (i=owners[proc]; i<owners[proc+1]; i++) { 4750 anzi = ai[i+1] - ai[i]; 4751 if (anzi) { 4752 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4753 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4754 nrows++; 4755 } 4756 } 4757 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4758 k++; 4759 buf_si += len_si[proc]; 4760 } 4761 4762 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4763 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4764 4765 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4766 for (i=0; i<merge->nrecv; i++) { 4767 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4768 } 4769 4770 ierr = PetscFree(len_si);CHKERRQ(ierr); 4771 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4772 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4773 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4774 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4775 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4776 ierr = PetscFree(status);CHKERRQ(ierr); 4777 4778 /* compute a local seq matrix in each processor */ 4779 /*----------------------------------------------*/ 4780 /* allocate bi array and free space for accumulating nonzero column info */ 4781 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4782 bi[0] = 0; 4783 4784 /* create and initialize a linked list */ 4785 nlnk = N+1; 4786 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4787 4788 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4789 len = ai[owners[rank+1]] - ai[owners[rank]]; 4790 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4791 4792 current_space = free_space; 4793 4794 /* determine symbolic info for each local row */ 4795 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4796 4797 for (k=0; k<merge->nrecv; k++) { 4798 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4799 nrows = *buf_ri_k[k]; 4800 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4801 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4802 } 4803 4804 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4805 len = 0; 4806 for (i=0; i<m; i++) { 4807 bnzi = 0; 4808 /* add local non-zero cols of this proc's seqmat into lnk */ 4809 arow = owners[rank] + i; 4810 anzi = ai[arow+1] - ai[arow]; 4811 aj = a->j + ai[arow]; 4812 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4813 bnzi += nlnk; 4814 /* add received col data into lnk */ 4815 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4816 if (i == *nextrow[k]) { /* i-th row */ 4817 anzi = *(nextai[k]+1) - *nextai[k]; 4818 aj = buf_rj[k] + *nextai[k]; 4819 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4820 bnzi += nlnk; 4821 nextrow[k]++; nextai[k]++; 4822 } 4823 } 4824 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4825 4826 /* if free space is not available, make more free space */ 4827 if (current_space->local_remaining<bnzi) { 4828 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4829 nspacedouble++; 4830 } 4831 /* copy data into free space, then initialize lnk */ 4832 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4833 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4834 4835 current_space->array += bnzi; 4836 current_space->local_used += bnzi; 4837 current_space->local_remaining -= bnzi; 4838 4839 bi[i+1] = bi[i] + bnzi; 4840 } 4841 4842 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4843 4844 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4845 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4846 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4847 4848 /* create symbolic parallel matrix B_mpi */ 4849 /*---------------------------------------*/ 4850 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4851 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4852 if (n==PETSC_DECIDE) { 4853 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4854 } else { 4855 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4856 } 4857 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4858 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4859 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4860 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4861 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4862 4863 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4864 B_mpi->assembled = PETSC_FALSE; 4865 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4866 merge->bi = bi; 4867 merge->bj = bj; 4868 merge->buf_ri = buf_ri; 4869 merge->buf_rj = buf_rj; 4870 merge->coi = NULL; 4871 merge->coj = NULL; 4872 merge->owners_co = NULL; 4873 4874 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4875 4876 /* attach the supporting struct to B_mpi for reuse */ 4877 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4878 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4879 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4880 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4881 *mpimat = B_mpi; 4882 4883 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4884 PetscFunctionReturn(0); 4885 } 4886 4887 #undef __FUNCT__ 4888 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4889 /*@C 4890 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4891 matrices from each processor 4892 4893 Collective on MPI_Comm 4894 4895 Input Parameters: 4896 + comm - the communicators the parallel matrix will live on 4897 . seqmat - the input sequential matrices 4898 . m - number of local rows (or PETSC_DECIDE) 4899 . n - number of local columns (or PETSC_DECIDE) 4900 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4901 4902 Output Parameter: 4903 . mpimat - the parallel matrix generated 4904 4905 Level: advanced 4906 4907 Notes: 4908 The dimensions of the sequential matrix in each processor MUST be the same. 4909 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4910 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4911 @*/ 4912 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4913 { 4914 PetscErrorCode ierr; 4915 PetscMPIInt size; 4916 4917 PetscFunctionBegin; 4918 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4919 if (size == 1) { 4920 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4921 if (scall == MAT_INITIAL_MATRIX) { 4922 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4923 } else { 4924 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4925 } 4926 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4927 PetscFunctionReturn(0); 4928 } 4929 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4930 if (scall == MAT_INITIAL_MATRIX) { 4931 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4932 } 4933 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4934 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4935 PetscFunctionReturn(0); 4936 } 4937 4938 #undef __FUNCT__ 4939 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4940 /*@ 4941 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4942 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4943 with MatGetSize() 4944 4945 Not Collective 4946 4947 Input Parameters: 4948 + A - the matrix 4949 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4950 4951 Output Parameter: 4952 . A_loc - the local sequential matrix generated 4953 4954 Level: developer 4955 4956 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4957 4958 @*/ 4959 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4960 { 4961 PetscErrorCode ierr; 4962 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4963 Mat_SeqAIJ *mat,*a,*b; 4964 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4965 MatScalar *aa,*ba,*cam; 4966 PetscScalar *ca; 4967 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4968 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4969 PetscBool match; 4970 4971 PetscFunctionBegin; 4972 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4973 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4974 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4975 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4976 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4977 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4978 aa = a->a; ba = b->a; 4979 if (scall == MAT_INITIAL_MATRIX) { 4980 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4981 ci[0] = 0; 4982 for (i=0; i<am; i++) { 4983 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4984 } 4985 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4986 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4987 k = 0; 4988 for (i=0; i<am; i++) { 4989 ncols_o = bi[i+1] - bi[i]; 4990 ncols_d = ai[i+1] - ai[i]; 4991 /* off-diagonal portion of A */ 4992 for (jo=0; jo<ncols_o; jo++) { 4993 col = cmap[*bj]; 4994 if (col >= cstart) break; 4995 cj[k] = col; bj++; 4996 ca[k++] = *ba++; 4997 } 4998 /* diagonal portion of A */ 4999 for (j=0; j<ncols_d; j++) { 5000 cj[k] = cstart + *aj++; 5001 ca[k++] = *aa++; 5002 } 5003 /* off-diagonal portion of A */ 5004 for (j=jo; j<ncols_o; j++) { 5005 cj[k] = cmap[*bj++]; 5006 ca[k++] = *ba++; 5007 } 5008 } 5009 /* put together the new matrix */ 5010 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5011 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5012 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5013 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5014 mat->free_a = PETSC_TRUE; 5015 mat->free_ij = PETSC_TRUE; 5016 mat->nonew = 0; 5017 } else if (scall == MAT_REUSE_MATRIX) { 5018 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5019 ci = mat->i; cj = mat->j; cam = mat->a; 5020 for (i=0; i<am; i++) { 5021 /* off-diagonal portion of A */ 5022 ncols_o = bi[i+1] - bi[i]; 5023 for (jo=0; jo<ncols_o; jo++) { 5024 col = cmap[*bj]; 5025 if (col >= cstart) break; 5026 *cam++ = *ba++; bj++; 5027 } 5028 /* diagonal portion of A */ 5029 ncols_d = ai[i+1] - ai[i]; 5030 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5031 /* off-diagonal portion of A */ 5032 for (j=jo; j<ncols_o; j++) { 5033 *cam++ = *ba++; bj++; 5034 } 5035 } 5036 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5037 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5038 PetscFunctionReturn(0); 5039 } 5040 5041 #undef __FUNCT__ 5042 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5043 /*@C 5044 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5045 5046 Not Collective 5047 5048 Input Parameters: 5049 + A - the matrix 5050 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5051 - row, col - index sets of rows and columns to extract (or NULL) 5052 5053 Output Parameter: 5054 . A_loc - the local sequential matrix generated 5055 5056 Level: developer 5057 5058 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5059 5060 @*/ 5061 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5062 { 5063 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5064 PetscErrorCode ierr; 5065 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5066 IS isrowa,iscola; 5067 Mat *aloc; 5068 PetscBool match; 5069 5070 PetscFunctionBegin; 5071 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5072 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5073 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5074 if (!row) { 5075 start = A->rmap->rstart; end = A->rmap->rend; 5076 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5077 } else { 5078 isrowa = *row; 5079 } 5080 if (!col) { 5081 start = A->cmap->rstart; 5082 cmap = a->garray; 5083 nzA = a->A->cmap->n; 5084 nzB = a->B->cmap->n; 5085 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5086 ncols = 0; 5087 for (i=0; i<nzB; i++) { 5088 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5089 else break; 5090 } 5091 imark = i; 5092 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5093 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5094 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5095 } else { 5096 iscola = *col; 5097 } 5098 if (scall != MAT_INITIAL_MATRIX) { 5099 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5100 aloc[0] = *A_loc; 5101 } 5102 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5103 *A_loc = aloc[0]; 5104 ierr = PetscFree(aloc);CHKERRQ(ierr); 5105 if (!row) { 5106 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5107 } 5108 if (!col) { 5109 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5110 } 5111 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5112 PetscFunctionReturn(0); 5113 } 5114 5115 #undef __FUNCT__ 5116 #define __FUNCT__ "MatGetBrowsOfAcols" 5117 /*@C 5118 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5119 5120 Collective on Mat 5121 5122 Input Parameters: 5123 + A,B - the matrices in mpiaij format 5124 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5125 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5126 5127 Output Parameter: 5128 + rowb, colb - index sets of rows and columns of B to extract 5129 - B_seq - the sequential matrix generated 5130 5131 Level: developer 5132 5133 @*/ 5134 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5135 { 5136 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5137 PetscErrorCode ierr; 5138 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5139 IS isrowb,iscolb; 5140 Mat *bseq=NULL; 5141 5142 PetscFunctionBegin; 5143 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5144 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5145 } 5146 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5147 5148 if (scall == MAT_INITIAL_MATRIX) { 5149 start = A->cmap->rstart; 5150 cmap = a->garray; 5151 nzA = a->A->cmap->n; 5152 nzB = a->B->cmap->n; 5153 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5154 ncols = 0; 5155 for (i=0; i<nzB; i++) { /* row < local row index */ 5156 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5157 else break; 5158 } 5159 imark = i; 5160 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5161 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5162 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5163 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5164 } else { 5165 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5166 isrowb = *rowb; iscolb = *colb; 5167 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5168 bseq[0] = *B_seq; 5169 } 5170 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5171 *B_seq = bseq[0]; 5172 ierr = PetscFree(bseq);CHKERRQ(ierr); 5173 if (!rowb) { 5174 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5175 } else { 5176 *rowb = isrowb; 5177 } 5178 if (!colb) { 5179 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5180 } else { 5181 *colb = iscolb; 5182 } 5183 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5184 PetscFunctionReturn(0); 5185 } 5186 5187 #undef __FUNCT__ 5188 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5189 /* 5190 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5191 of the OFF-DIAGONAL portion of local A 5192 5193 Collective on Mat 5194 5195 Input Parameters: 5196 + A,B - the matrices in mpiaij format 5197 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5198 5199 Output Parameter: 5200 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5201 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5202 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5203 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5204 5205 Level: developer 5206 5207 */ 5208 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5209 { 5210 VecScatter_MPI_General *gen_to,*gen_from; 5211 PetscErrorCode ierr; 5212 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5213 Mat_SeqAIJ *b_oth; 5214 VecScatter ctx =a->Mvctx; 5215 MPI_Comm comm; 5216 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5217 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5218 PetscScalar *rvalues,*svalues; 5219 MatScalar *b_otha,*bufa,*bufA; 5220 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5221 MPI_Request *rwaits = NULL,*swaits = NULL; 5222 MPI_Status *sstatus,rstatus; 5223 PetscMPIInt jj; 5224 PetscInt *cols,sbs,rbs; 5225 PetscScalar *vals; 5226 5227 PetscFunctionBegin; 5228 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5229 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5230 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5231 } 5232 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5233 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5234 5235 gen_to = (VecScatter_MPI_General*)ctx->todata; 5236 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5237 rvalues = gen_from->values; /* holds the length of receiving row */ 5238 svalues = gen_to->values; /* holds the length of sending row */ 5239 nrecvs = gen_from->n; 5240 nsends = gen_to->n; 5241 5242 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5243 srow = gen_to->indices; /* local row index to be sent */ 5244 sstarts = gen_to->starts; 5245 sprocs = gen_to->procs; 5246 sstatus = gen_to->sstatus; 5247 sbs = gen_to->bs; 5248 rstarts = gen_from->starts; 5249 rprocs = gen_from->procs; 5250 rbs = gen_from->bs; 5251 5252 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5253 if (scall == MAT_INITIAL_MATRIX) { 5254 /* i-array */ 5255 /*---------*/ 5256 /* post receives */ 5257 for (i=0; i<nrecvs; i++) { 5258 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5259 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5260 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5261 } 5262 5263 /* pack the outgoing message */ 5264 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5265 5266 sstartsj[0] = 0; 5267 rstartsj[0] = 0; 5268 len = 0; /* total length of j or a array to be sent */ 5269 k = 0; 5270 for (i=0; i<nsends; i++) { 5271 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5272 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5273 for (j=0; j<nrows; j++) { 5274 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5275 for (l=0; l<sbs; l++) { 5276 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5277 5278 rowlen[j*sbs+l] = ncols; 5279 5280 len += ncols; 5281 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5282 } 5283 k++; 5284 } 5285 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5286 5287 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5288 } 5289 /* recvs and sends of i-array are completed */ 5290 i = nrecvs; 5291 while (i--) { 5292 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5293 } 5294 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5295 5296 /* allocate buffers for sending j and a arrays */ 5297 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5298 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5299 5300 /* create i-array of B_oth */ 5301 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5302 5303 b_othi[0] = 0; 5304 len = 0; /* total length of j or a array to be received */ 5305 k = 0; 5306 for (i=0; i<nrecvs; i++) { 5307 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5308 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5309 for (j=0; j<nrows; j++) { 5310 b_othi[k+1] = b_othi[k] + rowlen[j]; 5311 len += rowlen[j]; k++; 5312 } 5313 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5314 } 5315 5316 /* allocate space for j and a arrrays of B_oth */ 5317 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5318 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5319 5320 /* j-array */ 5321 /*---------*/ 5322 /* post receives of j-array */ 5323 for (i=0; i<nrecvs; i++) { 5324 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5325 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5326 } 5327 5328 /* pack the outgoing message j-array */ 5329 k = 0; 5330 for (i=0; i<nsends; i++) { 5331 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5332 bufJ = bufj+sstartsj[i]; 5333 for (j=0; j<nrows; j++) { 5334 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5335 for (ll=0; ll<sbs; ll++) { 5336 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5337 for (l=0; l<ncols; l++) { 5338 *bufJ++ = cols[l]; 5339 } 5340 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5341 } 5342 } 5343 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5344 } 5345 5346 /* recvs and sends of j-array are completed */ 5347 i = nrecvs; 5348 while (i--) { 5349 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5350 } 5351 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5352 } else if (scall == MAT_REUSE_MATRIX) { 5353 sstartsj = *startsj_s; 5354 rstartsj = *startsj_r; 5355 bufa = *bufa_ptr; 5356 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5357 b_otha = b_oth->a; 5358 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5359 5360 /* a-array */ 5361 /*---------*/ 5362 /* post receives of a-array */ 5363 for (i=0; i<nrecvs; i++) { 5364 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5365 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5366 } 5367 5368 /* pack the outgoing message a-array */ 5369 k = 0; 5370 for (i=0; i<nsends; i++) { 5371 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5372 bufA = bufa+sstartsj[i]; 5373 for (j=0; j<nrows; j++) { 5374 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5375 for (ll=0; ll<sbs; ll++) { 5376 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5377 for (l=0; l<ncols; l++) { 5378 *bufA++ = vals[l]; 5379 } 5380 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5381 } 5382 } 5383 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5384 } 5385 /* recvs and sends of a-array are completed */ 5386 i = nrecvs; 5387 while (i--) { 5388 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5389 } 5390 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5391 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5392 5393 if (scall == MAT_INITIAL_MATRIX) { 5394 /* put together the new matrix */ 5395 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5396 5397 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5398 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5399 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5400 b_oth->free_a = PETSC_TRUE; 5401 b_oth->free_ij = PETSC_TRUE; 5402 b_oth->nonew = 0; 5403 5404 ierr = PetscFree(bufj);CHKERRQ(ierr); 5405 if (!startsj_s || !bufa_ptr) { 5406 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5407 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5408 } else { 5409 *startsj_s = sstartsj; 5410 *startsj_r = rstartsj; 5411 *bufa_ptr = bufa; 5412 } 5413 } 5414 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5415 PetscFunctionReturn(0); 5416 } 5417 5418 #undef __FUNCT__ 5419 #define __FUNCT__ "MatGetCommunicationStructs" 5420 /*@C 5421 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5422 5423 Not Collective 5424 5425 Input Parameters: 5426 . A - The matrix in mpiaij format 5427 5428 Output Parameter: 5429 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5430 . colmap - A map from global column index to local index into lvec 5431 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5432 5433 Level: developer 5434 5435 @*/ 5436 #if defined(PETSC_USE_CTABLE) 5437 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5438 #else 5439 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5440 #endif 5441 { 5442 Mat_MPIAIJ *a; 5443 5444 PetscFunctionBegin; 5445 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5446 PetscValidPointer(lvec, 2); 5447 PetscValidPointer(colmap, 3); 5448 PetscValidPointer(multScatter, 4); 5449 a = (Mat_MPIAIJ*) A->data; 5450 if (lvec) *lvec = a->lvec; 5451 if (colmap) *colmap = a->colmap; 5452 if (multScatter) *multScatter = a->Mvctx; 5453 PetscFunctionReturn(0); 5454 } 5455 5456 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5457 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5458 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5459 5460 #undef __FUNCT__ 5461 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5462 /* 5463 Computes (B'*A')' since computing B*A directly is untenable 5464 5465 n p p 5466 ( ) ( ) ( ) 5467 m ( A ) * n ( B ) = m ( C ) 5468 ( ) ( ) ( ) 5469 5470 */ 5471 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5472 { 5473 PetscErrorCode ierr; 5474 Mat At,Bt,Ct; 5475 5476 PetscFunctionBegin; 5477 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5478 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5479 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5480 ierr = MatDestroy(&At);CHKERRQ(ierr); 5481 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5482 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5483 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5484 PetscFunctionReturn(0); 5485 } 5486 5487 #undef __FUNCT__ 5488 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5489 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5490 { 5491 PetscErrorCode ierr; 5492 PetscInt m=A->rmap->n,n=B->cmap->n; 5493 Mat Cmat; 5494 5495 PetscFunctionBegin; 5496 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5497 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5498 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5499 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5500 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5501 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5502 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5503 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5504 5505 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5506 5507 *C = Cmat; 5508 PetscFunctionReturn(0); 5509 } 5510 5511 /* ----------------------------------------------------------------*/ 5512 #undef __FUNCT__ 5513 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5514 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5515 { 5516 PetscErrorCode ierr; 5517 5518 PetscFunctionBegin; 5519 if (scall == MAT_INITIAL_MATRIX) { 5520 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5521 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5522 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5523 } 5524 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5525 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5526 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5527 PetscFunctionReturn(0); 5528 } 5529 5530 #if defined(PETSC_HAVE_MUMPS) 5531 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5532 #endif 5533 #if defined(PETSC_HAVE_PASTIX) 5534 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5535 #endif 5536 #if defined(PETSC_HAVE_SUPERLU_DIST) 5537 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5538 #endif 5539 #if defined(PETSC_HAVE_CLIQUE) 5540 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5541 #endif 5542 5543 /*MC 5544 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5545 5546 Options Database Keys: 5547 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5548 5549 Level: beginner 5550 5551 .seealso: MatCreateAIJ() 5552 M*/ 5553 5554 #undef __FUNCT__ 5555 #define __FUNCT__ "MatCreate_MPIAIJ" 5556 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5557 { 5558 Mat_MPIAIJ *b; 5559 PetscErrorCode ierr; 5560 PetscMPIInt size; 5561 5562 PetscFunctionBegin; 5563 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5564 5565 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5566 B->data = (void*)b; 5567 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5568 B->assembled = PETSC_FALSE; 5569 B->insertmode = NOT_SET_VALUES; 5570 b->size = size; 5571 5572 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5573 5574 /* build cache for off array entries formed */ 5575 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5576 5577 b->donotstash = PETSC_FALSE; 5578 b->colmap = 0; 5579 b->garray = 0; 5580 b->roworiented = PETSC_TRUE; 5581 5582 /* stuff used for matrix vector multiply */ 5583 b->lvec = NULL; 5584 b->Mvctx = NULL; 5585 5586 /* stuff for MatGetRow() */ 5587 b->rowindices = 0; 5588 b->rowvalues = 0; 5589 b->getrowactive = PETSC_FALSE; 5590 5591 /* flexible pointer used in CUSP/CUSPARSE classes */ 5592 b->spptr = NULL; 5593 5594 #if defined(PETSC_HAVE_MUMPS) 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5596 #endif 5597 #if defined(PETSC_HAVE_PASTIX) 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5599 #endif 5600 #if defined(PETSC_HAVE_SUPERLU_DIST) 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5602 #endif 5603 #if defined(PETSC_HAVE_CLIQUE) 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5605 #endif 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5619 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5620 PetscFunctionReturn(0); 5621 } 5622 5623 #undef __FUNCT__ 5624 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5625 /*@ 5626 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5627 and "off-diagonal" part of the matrix in CSR format. 5628 5629 Collective on MPI_Comm 5630 5631 Input Parameters: 5632 + comm - MPI communicator 5633 . m - number of local rows (Cannot be PETSC_DECIDE) 5634 . n - This value should be the same as the local size used in creating the 5635 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5636 calculated if N is given) For square matrices n is almost always m. 5637 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5638 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5639 . i - row indices for "diagonal" portion of matrix 5640 . j - column indices 5641 . a - matrix values 5642 . oi - row indices for "off-diagonal" portion of matrix 5643 . oj - column indices 5644 - oa - matrix values 5645 5646 Output Parameter: 5647 . mat - the matrix 5648 5649 Level: advanced 5650 5651 Notes: 5652 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5653 must free the arrays once the matrix has been destroyed and not before. 5654 5655 The i and j indices are 0 based 5656 5657 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5658 5659 This sets local rows and cannot be used to set off-processor values. 5660 5661 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5662 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5663 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5664 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5665 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5666 communication if it is known that only local entries will be set. 5667 5668 .keywords: matrix, aij, compressed row, sparse, parallel 5669 5670 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5671 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5672 @*/ 5673 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5674 { 5675 PetscErrorCode ierr; 5676 Mat_MPIAIJ *maij; 5677 5678 PetscFunctionBegin; 5679 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5680 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5681 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5682 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5683 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5684 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5685 maij = (Mat_MPIAIJ*) (*mat)->data; 5686 5687 (*mat)->preallocated = PETSC_TRUE; 5688 5689 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5690 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5691 5692 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5693 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5694 5695 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5696 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5697 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5698 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 5700 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5701 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5702 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5703 PetscFunctionReturn(0); 5704 } 5705 5706 /* 5707 Special version for direct calls from Fortran 5708 */ 5709 #include <petsc-private/fortranimpl.h> 5710 5711 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5712 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5713 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5714 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5715 #endif 5716 5717 /* Change these macros so can be used in void function */ 5718 #undef CHKERRQ 5719 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5720 #undef SETERRQ2 5721 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5722 #undef SETERRQ3 5723 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5724 #undef SETERRQ 5725 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5726 5727 #undef __FUNCT__ 5728 #define __FUNCT__ "matsetvaluesmpiaij_" 5729 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5730 { 5731 Mat mat = *mmat; 5732 PetscInt m = *mm, n = *mn; 5733 InsertMode addv = *maddv; 5734 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5735 PetscScalar value; 5736 PetscErrorCode ierr; 5737 5738 MatCheckPreallocated(mat,1); 5739 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5740 5741 #if defined(PETSC_USE_DEBUG) 5742 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5743 #endif 5744 { 5745 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5746 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5747 PetscBool roworiented = aij->roworiented; 5748 5749 /* Some Variables required in the macro */ 5750 Mat A = aij->A; 5751 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5752 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5753 MatScalar *aa = a->a; 5754 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5755 Mat B = aij->B; 5756 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5757 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5758 MatScalar *ba = b->a; 5759 5760 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5761 PetscInt nonew = a->nonew; 5762 MatScalar *ap1,*ap2; 5763 5764 PetscFunctionBegin; 5765 for (i=0; i<m; i++) { 5766 if (im[i] < 0) continue; 5767 #if defined(PETSC_USE_DEBUG) 5768 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5769 #endif 5770 if (im[i] >= rstart && im[i] < rend) { 5771 row = im[i] - rstart; 5772 lastcol1 = -1; 5773 rp1 = aj + ai[row]; 5774 ap1 = aa + ai[row]; 5775 rmax1 = aimax[row]; 5776 nrow1 = ailen[row]; 5777 low1 = 0; 5778 high1 = nrow1; 5779 lastcol2 = -1; 5780 rp2 = bj + bi[row]; 5781 ap2 = ba + bi[row]; 5782 rmax2 = bimax[row]; 5783 nrow2 = bilen[row]; 5784 low2 = 0; 5785 high2 = nrow2; 5786 5787 for (j=0; j<n; j++) { 5788 if (roworiented) value = v[i*n+j]; 5789 else value = v[i+j*m]; 5790 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5791 if (in[j] >= cstart && in[j] < cend) { 5792 col = in[j] - cstart; 5793 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5794 } else if (in[j] < 0) continue; 5795 #if defined(PETSC_USE_DEBUG) 5796 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5797 #endif 5798 else { 5799 if (mat->was_assembled) { 5800 if (!aij->colmap) { 5801 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5802 } 5803 #if defined(PETSC_USE_CTABLE) 5804 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5805 col--; 5806 #else 5807 col = aij->colmap[in[j]] - 1; 5808 #endif 5809 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5810 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5811 col = in[j]; 5812 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5813 B = aij->B; 5814 b = (Mat_SeqAIJ*)B->data; 5815 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5816 rp2 = bj + bi[row]; 5817 ap2 = ba + bi[row]; 5818 rmax2 = bimax[row]; 5819 nrow2 = bilen[row]; 5820 low2 = 0; 5821 high2 = nrow2; 5822 bm = aij->B->rmap->n; 5823 ba = b->a; 5824 } 5825 } else col = in[j]; 5826 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5827 } 5828 } 5829 } else if (!aij->donotstash) { 5830 if (roworiented) { 5831 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5832 } else { 5833 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5834 } 5835 } 5836 } 5837 } 5838 PetscFunctionReturnVoid(); 5839 } 5840 5841