1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 for (i=0; i<m; i++) { 502 if (im[i] < 0) continue; 503 #if defined(PETSC_USE_DEBUG) 504 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 505 #endif 506 if (im[i] >= rstart && im[i] < rend) { 507 row = im[i] - rstart; 508 lastcol1 = -1; 509 rp1 = aj + ai[row]; 510 ap1 = aa + ai[row]; 511 rmax1 = aimax[row]; 512 nrow1 = ailen[row]; 513 low1 = 0; 514 high1 = nrow1; 515 lastcol2 = -1; 516 rp2 = bj + bi[row]; 517 ap2 = ba + bi[row]; 518 rmax2 = bimax[row]; 519 nrow2 = bilen[row]; 520 low2 = 0; 521 high2 = nrow2; 522 523 for (j=0; j<n; j++) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend) { 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined(PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend) { 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt nstash,reallocs; 633 InsertMode addv; 634 635 PetscFunctionBegin; 636 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 637 638 /* make sure all processors are either in INSERTMODE or ADDMODE */ 639 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 641 mat->insertmode = addv; /* in case this processor had no cache */ 642 643 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 644 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 645 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 655 PetscErrorCode ierr; 656 PetscMPIInt n; 657 PetscInt i,j,rstart,ncols,flg; 658 PetscInt *row,*col; 659 PetscBool other_disassembled; 660 PetscScalar *val; 661 InsertMode addv = mat->insertmode; 662 663 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 664 665 PetscFunctionBegin; 666 if (!aij->donotstash && !mat->nooffprocentries) { 667 while (1) { 668 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 669 if (!flg) break; 670 671 for (i=0; i<n; ) { 672 /* Now identify the consecutive vals belonging to the same row */ 673 for (j=i,rstart=row[j]; j<n; j++) { 674 if (row[j] != rstart) break; 675 } 676 if (j < n) ncols = j-i; 677 else ncols = n-i; 678 /* Now assemble all these values with a single function call */ 679 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 680 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 707 708 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 709 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 719 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 720 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 721 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 722 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 730 { 731 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 736 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroRows_MPIAIJ" 742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 743 { 744 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 745 PetscInt *owners = A->rmap->range; 746 PetscInt n = A->rmap->n; 747 PetscMPIInt size = mat->size; 748 PetscSF sf; 749 PetscInt *lrows; 750 PetscSFNode *rrows; 751 PetscInt lastidx = -1, r, p = 0, len = 0; 752 PetscErrorCode ierr; 753 754 PetscFunctionBegin; 755 /* Create SF where leaves are input rows and roots are owned rows */ 756 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 757 for (r = 0; r < n; ++r) lrows[r] = -1; 758 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 759 for (r = 0; r < N; ++r) { 760 const PetscInt idx = rows[r]; 761 PetscBool found = PETSC_FALSE; 762 /* Trick for efficient searching for sorted rows */ 763 if (lastidx > idx) p = 0; 764 lastidx = idx; 765 for (; p < size; ++p) { 766 if (idx >= owners[p] && idx < owners[p+1]) { 767 rrows[r].rank = p; 768 rrows[r].index = rows[r] - owners[p]; 769 found = PETSC_TRUE; 770 break; 771 } 772 } 773 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 774 } 775 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 776 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 777 /* Collect flags for rows to be zeroed */ 778 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 779 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 780 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 781 /* Compress and put in row numbers */ 782 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 783 /* fix right hand side if needed */ 784 if (x && b) { 785 const PetscScalar *xx; 786 PetscScalar *bb; 787 788 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 789 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 790 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 791 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 792 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 793 } 794 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 795 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 796 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 823 { 824 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 825 PetscErrorCode ierr; 826 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 827 PetscInt i,j,r,m,p = 0,len = 0; 828 PetscInt *lrows,*owners = A->rmap->range; 829 PetscSFNode *rrows; 830 PetscSF sf; 831 const PetscScalar *xx; 832 PetscScalar *bb,*mask; 833 Vec xmask,lmask; 834 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 835 const PetscInt *aj, *ii,*ridx; 836 PetscScalar *aa; 837 #if defined(PETSC_DEBUG) 838 PetscBool found = PETSC_FALSE; 839 #endif 840 841 PetscFunctionBegin; 842 /* Create SF where leaves are input rows and roots are owned rows */ 843 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 844 for (r = 0; r < n; ++r) lrows[r] = -1; 845 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 846 for (r = 0; r < N; ++r) { 847 const PetscInt idx = rows[r]; 848 PetscBool found = PETSC_FALSE; 849 /* Trick for efficient searching for sorted rows */ 850 if (lastidx > idx) p = 0; 851 lastidx = idx; 852 for (; p < size; ++p) { 853 if (idx >= owners[p] && idx < owners[p+1]) { 854 rrows[r].rank = p; 855 rrows[r].index = rows[r] - owners[p]; 856 found = PETSC_TRUE; 857 break; 858 } 859 } 860 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 861 } 862 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 863 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 864 /* Collect flags for rows to be zeroed */ 865 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 866 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 867 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 868 /* Compress and put in row numbers */ 869 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870 /* zero diagonal part of matrix */ 871 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 872 /* handle off diagonal part of matrix */ 873 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 874 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 875 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 876 for (i=0; i<len; i++) bb[lrows[i]] = 1; 877 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 878 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 881 if (x) { 882 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 884 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 885 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 886 } 887 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 888 /* remove zeroed rows of off diagonal matrix */ 889 ii = aij->i; 890 for (i=0; i<len; i++) { 891 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 /* loop over all elements of off process part of matrix zeroing removed columns*/ 894 if (aij->compressedrow.use) { 895 m = aij->compressedrow.nrows; 896 ii = aij->compressedrow.i; 897 ridx = aij->compressedrow.rindex; 898 for (i=0; i<m; i++) { 899 n = ii[i+1] - ii[i]; 900 aj = aij->j + ii[i]; 901 aa = aij->a + ii[i]; 902 903 for (j=0; j<n; j++) { 904 if (PetscAbsScalar(mask[*aj])) { 905 if (b) bb[*ridx] -= *aa*xx[*aj]; 906 *aa = 0.0; 907 } 908 aa++; 909 aj++; 910 } 911 ridx++; 912 } 913 } else { /* do not use compressed row format */ 914 m = l->B->rmap->n; 915 for (i=0; i<m; i++) { 916 n = ii[i+1] - ii[i]; 917 aj = aij->j + ii[i]; 918 aa = aij->a + ii[i]; 919 for (j=0; j<n; j++) { 920 if (PetscAbsScalar(mask[*aj])) { 921 if (b) bb[i] -= *aa*xx[*aj]; 922 *aa = 0.0; 923 } 924 aa++; 925 aj++; 926 } 927 } 928 } 929 if (x) { 930 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 931 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 932 } 933 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 934 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 935 ierr = PetscFree(lrows);CHKERRQ(ierr); 936 937 /* only change matrix nonzero state if pattern was allowed to be changed */ 938 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 939 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 940 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 941 } 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatMult_MPIAIJ" 947 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 948 { 949 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 950 PetscErrorCode ierr; 951 PetscInt nt; 952 953 PetscFunctionBegin; 954 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 955 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 956 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 957 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 958 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 965 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 966 { 967 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 968 PetscErrorCode ierr; 969 970 PetscFunctionBegin; 971 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatMultAdd_MPIAIJ" 977 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 982 PetscFunctionBegin; 983 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 985 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 992 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 993 { 994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 995 PetscErrorCode ierr; 996 PetscBool merged; 997 998 PetscFunctionBegin; 999 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1000 /* do nondiagonal part */ 1001 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1002 if (!merged) { 1003 /* send it on its way */ 1004 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1005 /* do local part */ 1006 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* receive remote parts: note this assumes the values are not actually */ 1008 /* added in yy until the next line, */ 1009 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1010 } else { 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* send it on its way */ 1014 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1015 /* values actually were received in the Begin() but we need to call this nop */ 1016 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1017 } 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1023 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1024 { 1025 MPI_Comm comm; 1026 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1027 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1028 IS Me,Notme; 1029 PetscErrorCode ierr; 1030 PetscInt M,N,first,last,*notme,i; 1031 PetscMPIInt size; 1032 1033 PetscFunctionBegin; 1034 /* Easy test: symmetric diagonal block */ 1035 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1036 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1037 if (!*f) PetscFunctionReturn(0); 1038 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1039 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1040 if (size == 1) PetscFunctionReturn(0); 1041 1042 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1043 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1044 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1045 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1046 for (i=0; i<first; i++) notme[i] = i; 1047 for (i=last; i<M; i++) notme[i-last+first] = i; 1048 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1049 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1050 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1051 Aoff = Aoffs[0]; 1052 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1053 Boff = Boffs[0]; 1054 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1055 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1056 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1057 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1058 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1059 ierr = PetscFree(notme);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1065 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1066 { 1067 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1068 PetscErrorCode ierr; 1069 1070 PetscFunctionBegin; 1071 /* do nondiagonal part */ 1072 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1073 /* send it on its way */ 1074 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1075 /* do local part */ 1076 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1077 /* receive remote parts */ 1078 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 /* 1083 This only works correctly for square matrices where the subblock A->A is the 1084 diagonal block 1085 */ 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1088 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1089 { 1090 PetscErrorCode ierr; 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 1093 PetscFunctionBegin; 1094 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1095 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1096 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatScale_MPIAIJ" 1102 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 1107 PetscFunctionBegin; 1108 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1109 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatDestroy_MPIAIJ" 1115 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1116 { 1117 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1118 PetscErrorCode ierr; 1119 1120 PetscFunctionBegin; 1121 #if defined(PETSC_USE_LOG) 1122 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1123 #endif 1124 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1125 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1126 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1127 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1128 #if defined(PETSC_USE_CTABLE) 1129 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1130 #else 1131 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1132 #endif 1133 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1134 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1135 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1136 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1137 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1138 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1139 1140 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1154 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1155 { 1156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1157 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1158 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1159 PetscErrorCode ierr; 1160 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1161 int fd; 1162 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1163 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1164 PetscScalar *column_values; 1165 PetscInt message_count,flowcontrolcount; 1166 FILE *file; 1167 1168 PetscFunctionBegin; 1169 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1170 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1171 nz = A->nz + B->nz; 1172 if (!rank) { 1173 header[0] = MAT_FILE_CLASSID; 1174 header[1] = mat->rmap->N; 1175 header[2] = mat->cmap->N; 1176 1177 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1178 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1179 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 /* get largest number of rows any processor has */ 1181 rlen = mat->rmap->n; 1182 range = mat->rmap->range; 1183 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1184 } else { 1185 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1186 rlen = mat->rmap->n; 1187 } 1188 1189 /* load up the local row counts */ 1190 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1191 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1192 1193 /* store the row lengths to the file */ 1194 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1195 if (!rank) { 1196 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1197 for (i=1; i<size; i++) { 1198 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1199 rlen = range[i+1] - range[i]; 1200 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1201 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1202 } 1203 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1204 } else { 1205 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1206 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1207 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1208 } 1209 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1210 1211 /* load up the local column indices */ 1212 nzmax = nz; /* th processor needs space a largest processor needs */ 1213 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1214 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1215 cnt = 0; 1216 for (i=0; i<mat->rmap->n; i++) { 1217 for (j=B->i[i]; j<B->i[i+1]; j++) { 1218 if ((col = garray[B->j[j]]) > cstart) break; 1219 column_indices[cnt++] = col; 1220 } 1221 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1222 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1223 } 1224 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1225 1226 /* store the column indices to the file */ 1227 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1228 if (!rank) { 1229 MPI_Status status; 1230 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1231 for (i=1; i<size; i++) { 1232 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1233 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1234 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1235 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1236 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1237 } 1238 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1239 } else { 1240 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1241 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1242 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1243 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1244 } 1245 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1246 1247 /* load up the local column values */ 1248 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1249 cnt = 0; 1250 for (i=0; i<mat->rmap->n; i++) { 1251 for (j=B->i[i]; j<B->i[i+1]; j++) { 1252 if (garray[B->j[j]] > cstart) break; 1253 column_values[cnt++] = B->a[j]; 1254 } 1255 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1256 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1257 } 1258 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1259 1260 /* store the column values to the file */ 1261 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1262 if (!rank) { 1263 MPI_Status status; 1264 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1265 for (i=1; i<size; i++) { 1266 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1267 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1268 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1269 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1271 } 1272 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1273 } else { 1274 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1275 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1276 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1277 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1278 } 1279 ierr = PetscFree(column_values);CHKERRQ(ierr); 1280 1281 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1282 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #include <petscdraw.h> 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1289 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1290 { 1291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1292 PetscErrorCode ierr; 1293 PetscMPIInt rank = aij->rank,size = aij->size; 1294 PetscBool isdraw,iascii,isbinary; 1295 PetscViewer sviewer; 1296 PetscViewerFormat format; 1297 1298 PetscFunctionBegin; 1299 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1300 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1301 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1302 if (iascii) { 1303 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1304 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1305 MatInfo info; 1306 PetscBool inodes; 1307 1308 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1309 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1310 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1312 if (!inodes) { 1313 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1314 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1315 } else { 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1317 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1318 } 1319 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1320 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1321 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1322 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1323 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1324 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1326 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1329 PetscInt inodecount,inodelimit,*inodes; 1330 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1331 if (inodes) { 1332 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1333 } else { 1334 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1335 } 1336 PetscFunctionReturn(0); 1337 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1338 PetscFunctionReturn(0); 1339 } 1340 } else if (isbinary) { 1341 if (size == 1) { 1342 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1343 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1344 } else { 1345 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1346 } 1347 PetscFunctionReturn(0); 1348 } else if (isdraw) { 1349 PetscDraw draw; 1350 PetscBool isnull; 1351 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1352 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1353 } 1354 1355 { 1356 /* assemble the entire matrix onto first processor. */ 1357 Mat A; 1358 Mat_SeqAIJ *Aloc; 1359 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1360 MatScalar *a; 1361 1362 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1363 if (!rank) { 1364 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1365 } else { 1366 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1367 } 1368 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1369 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1370 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1371 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1372 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1373 1374 /* copy over the A part */ 1375 Aloc = (Mat_SeqAIJ*)aij->A->data; 1376 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1377 row = mat->rmap->rstart; 1378 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1379 for (i=0; i<m; i++) { 1380 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1381 row++; 1382 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1383 } 1384 aj = Aloc->j; 1385 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1386 1387 /* copy over the B part */ 1388 Aloc = (Mat_SeqAIJ*)aij->B->data; 1389 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1390 row = mat->rmap->rstart; 1391 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1392 ct = cols; 1393 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1394 for (i=0; i<m; i++) { 1395 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1396 row++; 1397 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1398 } 1399 ierr = PetscFree(ct);CHKERRQ(ierr); 1400 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1401 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1402 /* 1403 Everyone has to call to draw the matrix since the graphics waits are 1404 synchronized across all processors that share the PetscDraw object 1405 */ 1406 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1407 if (!rank) { 1408 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1409 } 1410 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1411 ierr = MatDestroy(&A);CHKERRQ(ierr); 1412 } 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatView_MPIAIJ" 1418 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1419 { 1420 PetscErrorCode ierr; 1421 PetscBool iascii,isdraw,issocket,isbinary; 1422 1423 PetscFunctionBegin; 1424 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1425 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1426 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1427 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1428 if (iascii || isdraw || isbinary || issocket) { 1429 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1430 } 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatSOR_MPIAIJ" 1436 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1437 { 1438 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1439 PetscErrorCode ierr; 1440 Vec bb1 = 0; 1441 PetscBool hasop; 1442 1443 PetscFunctionBegin; 1444 if (flag == SOR_APPLY_UPPER) { 1445 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1450 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1451 } 1452 1453 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1454 if (flag & SOR_ZERO_INITIAL_GUESS) { 1455 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1456 its--; 1457 } 1458 1459 while (its--) { 1460 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1461 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1462 1463 /* update rhs: bb1 = bb - B*x */ 1464 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1465 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1466 1467 /* local sweep */ 1468 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1469 } 1470 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1471 if (flag & SOR_ZERO_INITIAL_GUESS) { 1472 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1473 its--; 1474 } 1475 while (its--) { 1476 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1477 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1478 1479 /* update rhs: bb1 = bb - B*x */ 1480 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1481 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1482 1483 /* local sweep */ 1484 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1485 } 1486 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1487 if (flag & SOR_ZERO_INITIAL_GUESS) { 1488 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1489 its--; 1490 } 1491 while (its--) { 1492 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1493 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1494 1495 /* update rhs: bb1 = bb - B*x */ 1496 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1497 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1498 1499 /* local sweep */ 1500 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1501 } 1502 } else if (flag & SOR_EISENSTAT) { 1503 Vec xx1; 1504 1505 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1506 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1507 1508 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1509 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1510 if (!mat->diag) { 1511 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1512 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1513 } 1514 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1515 if (hasop) { 1516 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1517 } else { 1518 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1519 } 1520 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1521 1522 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1523 1524 /* local sweep */ 1525 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1526 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1527 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1528 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1529 1530 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatPermute_MPIAIJ" 1536 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1537 { 1538 Mat aA,aB,Aperm; 1539 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1540 PetscScalar *aa,*ba; 1541 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1542 PetscSF rowsf,sf; 1543 IS parcolp = NULL; 1544 PetscBool done; 1545 PetscErrorCode ierr; 1546 1547 PetscFunctionBegin; 1548 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1549 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1550 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1551 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1552 1553 /* Invert row permutation to find out where my rows should go */ 1554 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1555 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1556 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1557 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1558 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1559 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1560 1561 /* Invert column permutation to find out where my columns should go */ 1562 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1563 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1564 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1565 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1566 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1567 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1568 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1569 1570 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1571 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1572 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1573 1574 /* Find out where my gcols should go */ 1575 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1576 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1577 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1578 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1579 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1580 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1581 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1582 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1583 1584 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1585 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1586 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1587 for (i=0; i<m; i++) { 1588 PetscInt row = rdest[i],rowner; 1589 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1590 for (j=ai[i]; j<ai[i+1]; j++) { 1591 PetscInt cowner,col = cdest[aj[j]]; 1592 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1593 if (rowner == cowner) dnnz[i]++; 1594 else onnz[i]++; 1595 } 1596 for (j=bi[i]; j<bi[i+1]; j++) { 1597 PetscInt cowner,col = gcdest[bj[j]]; 1598 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1599 if (rowner == cowner) dnnz[i]++; 1600 else onnz[i]++; 1601 } 1602 } 1603 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1604 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1605 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1606 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1607 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1608 1609 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1610 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1611 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1612 for (i=0; i<m; i++) { 1613 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1614 PetscInt j0,rowlen; 1615 rowlen = ai[i+1] - ai[i]; 1616 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1617 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1618 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1619 } 1620 rowlen = bi[i+1] - bi[i]; 1621 for (j0=j=0; j<rowlen; j0=j) { 1622 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1623 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1624 } 1625 } 1626 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1627 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1628 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1629 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1630 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1631 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1632 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1633 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1634 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1635 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1636 *B = Aperm; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1642 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1643 { 1644 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1645 Mat A = mat->A,B = mat->B; 1646 PetscErrorCode ierr; 1647 PetscReal isend[5],irecv[5]; 1648 1649 PetscFunctionBegin; 1650 info->block_size = 1.0; 1651 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1652 1653 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1654 isend[3] = info->memory; isend[4] = info->mallocs; 1655 1656 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1657 1658 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1659 isend[3] += info->memory; isend[4] += info->mallocs; 1660 if (flag == MAT_LOCAL) { 1661 info->nz_used = isend[0]; 1662 info->nz_allocated = isend[1]; 1663 info->nz_unneeded = isend[2]; 1664 info->memory = isend[3]; 1665 info->mallocs = isend[4]; 1666 } else if (flag == MAT_GLOBAL_MAX) { 1667 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1668 1669 info->nz_used = irecv[0]; 1670 info->nz_allocated = irecv[1]; 1671 info->nz_unneeded = irecv[2]; 1672 info->memory = irecv[3]; 1673 info->mallocs = irecv[4]; 1674 } else if (flag == MAT_GLOBAL_SUM) { 1675 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1676 1677 info->nz_used = irecv[0]; 1678 info->nz_allocated = irecv[1]; 1679 info->nz_unneeded = irecv[2]; 1680 info->memory = irecv[3]; 1681 info->mallocs = irecv[4]; 1682 } 1683 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1684 info->fill_ratio_needed = 0; 1685 info->factor_mallocs = 0; 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatSetOption_MPIAIJ" 1691 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1692 { 1693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 switch (op) { 1698 case MAT_NEW_NONZERO_LOCATIONS: 1699 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1700 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1701 case MAT_KEEP_NONZERO_PATTERN: 1702 case MAT_NEW_NONZERO_LOCATION_ERR: 1703 case MAT_USE_INODES: 1704 case MAT_IGNORE_ZERO_ENTRIES: 1705 MatCheckPreallocated(A,1); 1706 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1707 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1708 break; 1709 case MAT_ROW_ORIENTED: 1710 a->roworiented = flg; 1711 1712 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1713 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1714 break; 1715 case MAT_NEW_DIAGONALS: 1716 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1717 break; 1718 case MAT_IGNORE_OFF_PROC_ENTRIES: 1719 a->donotstash = flg; 1720 break; 1721 case MAT_SPD: 1722 A->spd_set = PETSC_TRUE; 1723 A->spd = flg; 1724 if (flg) { 1725 A->symmetric = PETSC_TRUE; 1726 A->structurally_symmetric = PETSC_TRUE; 1727 A->symmetric_set = PETSC_TRUE; 1728 A->structurally_symmetric_set = PETSC_TRUE; 1729 } 1730 break; 1731 case MAT_SYMMETRIC: 1732 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1733 break; 1734 case MAT_STRUCTURALLY_SYMMETRIC: 1735 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1736 break; 1737 case MAT_HERMITIAN: 1738 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1739 break; 1740 case MAT_SYMMETRY_ETERNAL: 1741 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1742 break; 1743 default: 1744 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1745 } 1746 PetscFunctionReturn(0); 1747 } 1748 1749 #undef __FUNCT__ 1750 #define __FUNCT__ "MatGetRow_MPIAIJ" 1751 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1752 { 1753 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1754 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1755 PetscErrorCode ierr; 1756 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1757 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1758 PetscInt *cmap,*idx_p; 1759 1760 PetscFunctionBegin; 1761 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1762 mat->getrowactive = PETSC_TRUE; 1763 1764 if (!mat->rowvalues && (idx || v)) { 1765 /* 1766 allocate enough space to hold information from the longest row. 1767 */ 1768 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1769 PetscInt max = 1,tmp; 1770 for (i=0; i<matin->rmap->n; i++) { 1771 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1772 if (max < tmp) max = tmp; 1773 } 1774 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1775 } 1776 1777 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1778 lrow = row - rstart; 1779 1780 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1781 if (!v) {pvA = 0; pvB = 0;} 1782 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1783 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1784 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1785 nztot = nzA + nzB; 1786 1787 cmap = mat->garray; 1788 if (v || idx) { 1789 if (nztot) { 1790 /* Sort by increasing column numbers, assuming A and B already sorted */ 1791 PetscInt imark = -1; 1792 if (v) { 1793 *v = v_p = mat->rowvalues; 1794 for (i=0; i<nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1796 else break; 1797 } 1798 imark = i; 1799 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1800 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1801 } 1802 if (idx) { 1803 *idx = idx_p = mat->rowindices; 1804 if (imark > -1) { 1805 for (i=0; i<imark; i++) { 1806 idx_p[i] = cmap[cworkB[i]]; 1807 } 1808 } else { 1809 for (i=0; i<nzB; i++) { 1810 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1811 else break; 1812 } 1813 imark = i; 1814 } 1815 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1816 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1817 } 1818 } else { 1819 if (idx) *idx = 0; 1820 if (v) *v = 0; 1821 } 1822 } 1823 *nz = nztot; 1824 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1825 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1831 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1832 { 1833 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1834 1835 PetscFunctionBegin; 1836 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1837 aij->getrowactive = PETSC_FALSE; 1838 PetscFunctionReturn(0); 1839 } 1840 1841 #undef __FUNCT__ 1842 #define __FUNCT__ "MatNorm_MPIAIJ" 1843 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1844 { 1845 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1846 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1847 PetscErrorCode ierr; 1848 PetscInt i,j,cstart = mat->cmap->rstart; 1849 PetscReal sum = 0.0; 1850 MatScalar *v; 1851 1852 PetscFunctionBegin; 1853 if (aij->size == 1) { 1854 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1855 } else { 1856 if (type == NORM_FROBENIUS) { 1857 v = amat->a; 1858 for (i=0; i<amat->nz; i++) { 1859 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1860 } 1861 v = bmat->a; 1862 for (i=0; i<bmat->nz; i++) { 1863 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1864 } 1865 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1866 *norm = PetscSqrtReal(*norm); 1867 } else if (type == NORM_1) { /* max column norm */ 1868 PetscReal *tmp,*tmp2; 1869 PetscInt *jj,*garray = aij->garray; 1870 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1871 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1872 *norm = 0.0; 1873 v = amat->a; jj = amat->j; 1874 for (j=0; j<amat->nz; j++) { 1875 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1876 } 1877 v = bmat->a; jj = bmat->j; 1878 for (j=0; j<bmat->nz; j++) { 1879 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1880 } 1881 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1882 for (j=0; j<mat->cmap->N; j++) { 1883 if (tmp2[j] > *norm) *norm = tmp2[j]; 1884 } 1885 ierr = PetscFree(tmp);CHKERRQ(ierr); 1886 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1887 } else if (type == NORM_INFINITY) { /* max row norm */ 1888 PetscReal ntemp = 0.0; 1889 for (j=0; j<aij->A->rmap->n; j++) { 1890 v = amat->a + amat->i[j]; 1891 sum = 0.0; 1892 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1893 sum += PetscAbsScalar(*v); v++; 1894 } 1895 v = bmat->a + bmat->i[j]; 1896 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1897 sum += PetscAbsScalar(*v); v++; 1898 } 1899 if (sum > ntemp) ntemp = sum; 1900 } 1901 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1902 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1903 } 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "MatTranspose_MPIAIJ" 1909 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1910 { 1911 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1912 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1913 PetscErrorCode ierr; 1914 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1915 PetscInt cstart = A->cmap->rstart,ncol; 1916 Mat B; 1917 MatScalar *array; 1918 1919 PetscFunctionBegin; 1920 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1921 1922 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1923 ai = Aloc->i; aj = Aloc->j; 1924 bi = Bloc->i; bj = Bloc->j; 1925 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1926 PetscInt *d_nnz,*g_nnz,*o_nnz; 1927 PetscSFNode *oloc; 1928 PETSC_UNUSED PetscSF sf; 1929 1930 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1931 /* compute d_nnz for preallocation */ 1932 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1933 for (i=0; i<ai[ma]; i++) { 1934 d_nnz[aj[i]]++; 1935 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1936 } 1937 /* compute local off-diagonal contributions */ 1938 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1939 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1940 /* map those to global */ 1941 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1942 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1943 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1944 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1945 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1946 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1947 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1948 1949 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1950 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1951 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1952 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1953 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1954 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1955 } else { 1956 B = *matout; 1957 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1958 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1959 } 1960 1961 /* copy over the A part */ 1962 array = Aloc->a; 1963 row = A->rmap->rstart; 1964 for (i=0; i<ma; i++) { 1965 ncol = ai[i+1]-ai[i]; 1966 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1967 row++; 1968 array += ncol; aj += ncol; 1969 } 1970 aj = Aloc->j; 1971 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1972 1973 /* copy over the B part */ 1974 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1975 array = Bloc->a; 1976 row = A->rmap->rstart; 1977 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1978 cols_tmp = cols; 1979 for (i=0; i<mb; i++) { 1980 ncol = bi[i+1]-bi[i]; 1981 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1982 row++; 1983 array += ncol; cols_tmp += ncol; 1984 } 1985 ierr = PetscFree(cols);CHKERRQ(ierr); 1986 1987 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1989 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1990 *matout = B; 1991 } else { 1992 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1993 } 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1999 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 Mat a = aij->A,b = aij->B; 2003 PetscErrorCode ierr; 2004 PetscInt s1,s2,s3; 2005 2006 PetscFunctionBegin; 2007 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2008 if (rr) { 2009 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2010 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2011 /* Overlap communication with computation. */ 2012 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2013 } 2014 if (ll) { 2015 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2016 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2017 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2018 } 2019 /* scale the diagonal block */ 2020 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2021 2022 if (rr) { 2023 /* Do a scatter end and then right scale the off-diagonal block */ 2024 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2026 } 2027 PetscFunctionReturn(0); 2028 } 2029 2030 #undef __FUNCT__ 2031 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2032 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2033 { 2034 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2035 PetscErrorCode ierr; 2036 2037 PetscFunctionBegin; 2038 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatEqual_MPIAIJ" 2044 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2045 { 2046 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2047 Mat a,b,c,d; 2048 PetscBool flg; 2049 PetscErrorCode ierr; 2050 2051 PetscFunctionBegin; 2052 a = matA->A; b = matA->B; 2053 c = matB->A; d = matB->B; 2054 2055 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2056 if (flg) { 2057 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2058 } 2059 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 #undef __FUNCT__ 2064 #define __FUNCT__ "MatCopy_MPIAIJ" 2065 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2066 { 2067 PetscErrorCode ierr; 2068 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2069 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2070 2071 PetscFunctionBegin; 2072 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2073 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2074 /* because of the column compression in the off-processor part of the matrix a->B, 2075 the number of columns in a->B and b->B may be different, hence we cannot call 2076 the MatCopy() directly on the two parts. If need be, we can provide a more 2077 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2078 then copying the submatrices */ 2079 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2080 } else { 2081 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2082 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatSetUp_MPIAIJ" 2089 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2090 { 2091 PetscErrorCode ierr; 2092 2093 PetscFunctionBegin; 2094 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2095 PetscFunctionReturn(0); 2096 } 2097 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2100 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2101 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2102 { 2103 PetscInt i,m=Y->rmap->N; 2104 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2105 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2106 const PetscInt *xi = x->i,*yi = y->i; 2107 2108 PetscFunctionBegin; 2109 /* Set the number of nonzeros in the new matrix */ 2110 for (i=0; i<m; i++) { 2111 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2112 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2113 nnz[i] = 0; 2114 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2115 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2116 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2117 nnz[i]++; 2118 } 2119 for (; k<nzy; k++) nnz[i]++; 2120 } 2121 PetscFunctionReturn(0); 2122 } 2123 2124 #undef __FUNCT__ 2125 #define __FUNCT__ "MatAXPY_MPIAIJ" 2126 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2127 { 2128 PetscErrorCode ierr; 2129 PetscInt i; 2130 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2131 PetscBLASInt bnz,one=1; 2132 Mat_SeqAIJ *x,*y; 2133 2134 PetscFunctionBegin; 2135 if (str == SAME_NONZERO_PATTERN) { 2136 PetscScalar alpha = a; 2137 x = (Mat_SeqAIJ*)xx->A->data; 2138 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2139 y = (Mat_SeqAIJ*)yy->A->data; 2140 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2141 x = (Mat_SeqAIJ*)xx->B->data; 2142 y = (Mat_SeqAIJ*)yy->B->data; 2143 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2144 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2145 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2146 } else if (str == SUBSET_NONZERO_PATTERN) { 2147 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2148 2149 x = (Mat_SeqAIJ*)xx->B->data; 2150 y = (Mat_SeqAIJ*)yy->B->data; 2151 if (y->xtoy && y->XtoY != xx->B) { 2152 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2153 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2154 } 2155 if (!y->xtoy) { /* get xtoy */ 2156 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2157 y->XtoY = xx->B; 2158 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2159 } 2160 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2161 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2162 } else { 2163 Mat B; 2164 PetscInt *nnz_d,*nnz_o; 2165 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2166 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2167 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2168 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2169 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2170 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2171 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2172 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2173 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2174 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2175 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2176 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2177 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2178 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2179 } 2180 PetscFunctionReturn(0); 2181 } 2182 2183 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2184 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatConjugate_MPIAIJ" 2187 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2188 { 2189 #if defined(PETSC_USE_COMPLEX) 2190 PetscErrorCode ierr; 2191 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2192 2193 PetscFunctionBegin; 2194 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2195 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2196 #else 2197 PetscFunctionBegin; 2198 #endif 2199 PetscFunctionReturn(0); 2200 } 2201 2202 #undef __FUNCT__ 2203 #define __FUNCT__ "MatRealPart_MPIAIJ" 2204 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2205 { 2206 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2207 PetscErrorCode ierr; 2208 2209 PetscFunctionBegin; 2210 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2211 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2212 PetscFunctionReturn(0); 2213 } 2214 2215 #undef __FUNCT__ 2216 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2217 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2218 { 2219 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2220 PetscErrorCode ierr; 2221 2222 PetscFunctionBegin; 2223 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2224 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2225 PetscFunctionReturn(0); 2226 } 2227 2228 #if defined(PETSC_HAVE_PBGL) 2229 2230 #include <boost/parallel/mpi/bsp_process_group.hpp> 2231 #include <boost/graph/distributed/ilu_default_graph.hpp> 2232 #include <boost/graph/distributed/ilu_0_block.hpp> 2233 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2234 #include <boost/graph/distributed/petsc/interface.hpp> 2235 #include <boost/multi_array.hpp> 2236 #include <boost/parallel/distributed_property_map->hpp> 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2240 /* 2241 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2242 */ 2243 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2244 { 2245 namespace petsc = boost::distributed::petsc; 2246 2247 namespace graph_dist = boost::graph::distributed; 2248 using boost::graph::distributed::ilu_default::process_group_type; 2249 using boost::graph::ilu_permuted; 2250 2251 PetscBool row_identity, col_identity; 2252 PetscContainer c; 2253 PetscInt m, n, M, N; 2254 PetscErrorCode ierr; 2255 2256 PetscFunctionBegin; 2257 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2258 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2259 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2260 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2261 2262 process_group_type pg; 2263 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2264 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2265 lgraph_type& level_graph = *lgraph_p; 2266 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2267 2268 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2269 ilu_permuted(level_graph); 2270 2271 /* put together the new matrix */ 2272 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2273 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2274 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2275 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2276 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2277 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2278 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2279 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2280 2281 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2282 ierr = PetscContainerSetPointer(c, lgraph_p); 2283 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2284 ierr = PetscContainerDestroy(&c); 2285 PetscFunctionReturn(0); 2286 } 2287 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2290 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2291 { 2292 PetscFunctionBegin; 2293 PetscFunctionReturn(0); 2294 } 2295 2296 #undef __FUNCT__ 2297 #define __FUNCT__ "MatSolve_MPIAIJ" 2298 /* 2299 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2300 */ 2301 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2302 { 2303 namespace graph_dist = boost::graph::distributed; 2304 2305 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2306 lgraph_type *lgraph_p; 2307 PetscContainer c; 2308 PetscErrorCode ierr; 2309 2310 PetscFunctionBegin; 2311 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2312 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2313 ierr = VecCopy(b, x);CHKERRQ(ierr); 2314 2315 PetscScalar *array_x; 2316 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2317 PetscInt sx; 2318 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2319 2320 PetscScalar *array_b; 2321 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2322 PetscInt sb; 2323 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2324 2325 lgraph_type& level_graph = *lgraph_p; 2326 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2327 2328 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2329 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2330 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2331 2332 typedef boost::iterator_property_map<array_ref_type::iterator, 2333 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2334 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2335 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2336 2337 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2338 PetscFunctionReturn(0); 2339 } 2340 #endif 2341 2342 #undef __FUNCT__ 2343 #define __FUNCT__ "MatDestroy_MatRedundant" 2344 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2345 { 2346 PetscErrorCode ierr; 2347 Mat_Redundant *redund; 2348 PetscInt i; 2349 PetscMPIInt size; 2350 2351 PetscFunctionBegin; 2352 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2353 if (size == 1) { 2354 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2355 redund = a->redundant; 2356 } else { 2357 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2358 redund = a->redundant; 2359 } 2360 if (redund){ 2361 if (redund->matseq) { /* via MatGetSubMatrices() */ 2362 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2363 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2364 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2365 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2366 } else { 2367 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2368 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2369 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2370 for (i=0; i<redund->nrecvs; i++) { 2371 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2372 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2373 } 2374 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2375 } 2376 2377 if (redund->psubcomm) { 2378 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2379 } 2380 ierr = redund->Destroy(A);CHKERRQ(ierr); 2381 ierr = PetscFree(redund);CHKERRQ(ierr); 2382 } 2383 PetscFunctionReturn(0); 2384 } 2385 2386 #undef __FUNCT__ 2387 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2388 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2389 { 2390 PetscMPIInt rank,size; 2391 MPI_Comm comm; 2392 PetscErrorCode ierr; 2393 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2394 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2395 PetscInt *rowrange = mat->rmap->range; 2396 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2397 Mat A = aij->A,B=aij->B,C=*matredundant; 2398 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2399 PetscScalar *sbuf_a; 2400 PetscInt nzlocal=a->nz+b->nz; 2401 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2402 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2403 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2404 MatScalar *aworkA,*aworkB; 2405 PetscScalar *vals; 2406 PetscMPIInt tag1,tag2,tag3,imdex; 2407 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2408 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2409 MPI_Status recv_status,*send_status; 2410 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2411 PetscInt **rbuf_j=NULL; 2412 PetscScalar **rbuf_a=NULL; 2413 Mat_Redundant *redund =NULL; 2414 2415 PetscFunctionBegin; 2416 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2417 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2418 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2419 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2420 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2421 2422 if (reuse == MAT_REUSE_MATRIX) { 2423 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2424 if (subsize == 1) { 2425 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2426 redund = c->redundant; 2427 } else { 2428 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2429 redund = c->redundant; 2430 } 2431 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2432 2433 nsends = redund->nsends; 2434 nrecvs = redund->nrecvs; 2435 send_rank = redund->send_rank; 2436 recv_rank = redund->recv_rank; 2437 sbuf_nz = redund->sbuf_nz; 2438 rbuf_nz = redund->rbuf_nz; 2439 sbuf_j = redund->sbuf_j; 2440 sbuf_a = redund->sbuf_a; 2441 rbuf_j = redund->rbuf_j; 2442 rbuf_a = redund->rbuf_a; 2443 } 2444 2445 if (reuse == MAT_INITIAL_MATRIX) { 2446 PetscInt nleftover,np_subcomm; 2447 2448 /* get the destination processors' id send_rank, nsends and nrecvs */ 2449 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2450 2451 np_subcomm = size/nsubcomm; 2452 nleftover = size - nsubcomm*np_subcomm; 2453 2454 /* block of codes below is specific for INTERLACED */ 2455 /* ------------------------------------------------*/ 2456 nsends = 0; nrecvs = 0; 2457 for (i=0; i<size; i++) { 2458 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2459 send_rank[nsends++] = i; 2460 recv_rank[nrecvs++] = i; 2461 } 2462 } 2463 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2464 i = size-nleftover-1; 2465 j = 0; 2466 while (j < nsubcomm - nleftover) { 2467 send_rank[nsends++] = i; 2468 i--; j++; 2469 } 2470 } 2471 2472 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2473 for (i=0; i<nleftover; i++) { 2474 recv_rank[nrecvs++] = size-nleftover+i; 2475 } 2476 } 2477 /*----------------------------------------------*/ 2478 2479 /* allocate sbuf_j, sbuf_a */ 2480 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2481 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2482 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2483 /* 2484 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2485 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2486 */ 2487 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2488 2489 /* copy mat's local entries into the buffers */ 2490 if (reuse == MAT_INITIAL_MATRIX) { 2491 rownz_max = 0; 2492 rptr = sbuf_j; 2493 cols = sbuf_j + rend-rstart + 1; 2494 vals = sbuf_a; 2495 rptr[0] = 0; 2496 for (i=0; i<rend-rstart; i++) { 2497 row = i + rstart; 2498 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2499 ncols = nzA + nzB; 2500 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2501 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2502 /* load the column indices for this row into cols */ 2503 lwrite = 0; 2504 for (l=0; l<nzB; l++) { 2505 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2506 vals[lwrite] = aworkB[l]; 2507 cols[lwrite++] = ctmp; 2508 } 2509 } 2510 for (l=0; l<nzA; l++) { 2511 vals[lwrite] = aworkA[l]; 2512 cols[lwrite++] = cstart + cworkA[l]; 2513 } 2514 for (l=0; l<nzB; l++) { 2515 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2516 vals[lwrite] = aworkB[l]; 2517 cols[lwrite++] = ctmp; 2518 } 2519 } 2520 vals += ncols; 2521 cols += ncols; 2522 rptr[i+1] = rptr[i] + ncols; 2523 if (rownz_max < ncols) rownz_max = ncols; 2524 } 2525 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2526 } else { /* only copy matrix values into sbuf_a */ 2527 rptr = sbuf_j; 2528 vals = sbuf_a; 2529 rptr[0] = 0; 2530 for (i=0; i<rend-rstart; i++) { 2531 row = i + rstart; 2532 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2533 ncols = nzA + nzB; 2534 cworkB = b->j + b->i[i]; 2535 aworkA = a->a + a->i[i]; 2536 aworkB = b->a + b->i[i]; 2537 lwrite = 0; 2538 for (l=0; l<nzB; l++) { 2539 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2540 } 2541 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2542 for (l=0; l<nzB; l++) { 2543 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2544 } 2545 vals += ncols; 2546 rptr[i+1] = rptr[i] + ncols; 2547 } 2548 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2549 2550 /* send nzlocal to others, and recv other's nzlocal */ 2551 /*--------------------------------------------------*/ 2552 if (reuse == MAT_INITIAL_MATRIX) { 2553 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2554 2555 s_waits2 = s_waits3 + nsends; 2556 s_waits1 = s_waits2 + nsends; 2557 r_waits1 = s_waits1 + nsends; 2558 r_waits2 = r_waits1 + nrecvs; 2559 r_waits3 = r_waits2 + nrecvs; 2560 } else { 2561 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2562 2563 r_waits3 = s_waits3 + nsends; 2564 } 2565 2566 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2567 if (reuse == MAT_INITIAL_MATRIX) { 2568 /* get new tags to keep the communication clean */ 2569 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2570 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2571 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2572 2573 /* post receives of other's nzlocal */ 2574 for (i=0; i<nrecvs; i++) { 2575 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2576 } 2577 /* send nzlocal to others */ 2578 for (i=0; i<nsends; i++) { 2579 sbuf_nz[i] = nzlocal; 2580 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2581 } 2582 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2583 count = nrecvs; 2584 while (count) { 2585 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2586 2587 recv_rank[imdex] = recv_status.MPI_SOURCE; 2588 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2589 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2590 2591 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2592 2593 rbuf_nz[imdex] += i + 2; 2594 2595 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2596 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2597 count--; 2598 } 2599 /* wait on sends of nzlocal */ 2600 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2601 /* send mat->i,j to others, and recv from other's */ 2602 /*------------------------------------------------*/ 2603 for (i=0; i<nsends; i++) { 2604 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2605 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2606 } 2607 /* wait on receives of mat->i,j */ 2608 /*------------------------------*/ 2609 count = nrecvs; 2610 while (count) { 2611 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2612 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2613 count--; 2614 } 2615 /* wait on sends of mat->i,j */ 2616 /*---------------------------*/ 2617 if (nsends) { 2618 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2619 } 2620 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2621 2622 /* post receives, send and receive mat->a */ 2623 /*----------------------------------------*/ 2624 for (imdex=0; imdex<nrecvs; imdex++) { 2625 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2626 } 2627 for (i=0; i<nsends; i++) { 2628 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2629 } 2630 count = nrecvs; 2631 while (count) { 2632 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2633 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2634 count--; 2635 } 2636 if (nsends) { 2637 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2638 } 2639 2640 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2641 2642 /* create redundant matrix */ 2643 /*-------------------------*/ 2644 if (reuse == MAT_INITIAL_MATRIX) { 2645 const PetscInt *range; 2646 PetscInt rstart_sub,rend_sub,mloc_sub; 2647 2648 /* compute rownz_max for preallocation */ 2649 for (imdex=0; imdex<nrecvs; imdex++) { 2650 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2651 rptr = rbuf_j[imdex]; 2652 for (i=0; i<j; i++) { 2653 ncols = rptr[i+1] - rptr[i]; 2654 if (rownz_max < ncols) rownz_max = ncols; 2655 } 2656 } 2657 2658 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2659 2660 /* get local size of redundant matrix 2661 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2662 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2663 rstart_sub = range[nsubcomm*subrank]; 2664 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2665 rend_sub = range[nsubcomm*(subrank+1)]; 2666 } else { 2667 rend_sub = mat->rmap->N; 2668 } 2669 mloc_sub = rend_sub - rstart_sub; 2670 2671 if (M == N) { 2672 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2673 } else { /* non-square matrix */ 2674 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2675 } 2676 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2677 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2678 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2679 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2680 } else { 2681 C = *matredundant; 2682 } 2683 2684 /* insert local matrix entries */ 2685 rptr = sbuf_j; 2686 cols = sbuf_j + rend-rstart + 1; 2687 vals = sbuf_a; 2688 for (i=0; i<rend-rstart; i++) { 2689 row = i + rstart; 2690 ncols = rptr[i+1] - rptr[i]; 2691 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2692 vals += ncols; 2693 cols += ncols; 2694 } 2695 /* insert received matrix entries */ 2696 for (imdex=0; imdex<nrecvs; imdex++) { 2697 rstart = rowrange[recv_rank[imdex]]; 2698 rend = rowrange[recv_rank[imdex]+1]; 2699 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2700 rptr = rbuf_j[imdex]; 2701 cols = rbuf_j[imdex] + rend-rstart + 1; 2702 vals = rbuf_a[imdex]; 2703 for (i=0; i<rend-rstart; i++) { 2704 row = i + rstart; 2705 ncols = rptr[i+1] - rptr[i]; 2706 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2707 vals += ncols; 2708 cols += ncols; 2709 } 2710 } 2711 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2712 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2713 2714 if (reuse == MAT_INITIAL_MATRIX) { 2715 *matredundant = C; 2716 2717 /* create a supporting struct and attach it to C for reuse */ 2718 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2719 if (subsize == 1) { 2720 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2721 c->redundant = redund; 2722 } else { 2723 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2724 c->redundant = redund; 2725 } 2726 2727 redund->nzlocal = nzlocal; 2728 redund->nsends = nsends; 2729 redund->nrecvs = nrecvs; 2730 redund->send_rank = send_rank; 2731 redund->recv_rank = recv_rank; 2732 redund->sbuf_nz = sbuf_nz; 2733 redund->rbuf_nz = rbuf_nz; 2734 redund->sbuf_j = sbuf_j; 2735 redund->sbuf_a = sbuf_a; 2736 redund->rbuf_j = rbuf_j; 2737 redund->rbuf_a = rbuf_a; 2738 redund->psubcomm = NULL; 2739 2740 redund->Destroy = C->ops->destroy; 2741 C->ops->destroy = MatDestroy_MatRedundant; 2742 } 2743 PetscFunctionReturn(0); 2744 } 2745 2746 #undef __FUNCT__ 2747 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2748 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2749 { 2750 PetscErrorCode ierr; 2751 MPI_Comm comm; 2752 PetscMPIInt size,subsize; 2753 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2754 Mat_Redundant *redund=NULL; 2755 PetscSubcomm psubcomm=NULL; 2756 MPI_Comm subcomm_in=subcomm; 2757 Mat *matseq; 2758 IS isrow,iscol; 2759 2760 PetscFunctionBegin; 2761 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2762 if (reuse == MAT_INITIAL_MATRIX) { 2763 /* create psubcomm, then get subcomm */ 2764 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2765 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2766 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2767 2768 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2769 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2770 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2771 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2772 subcomm = psubcomm->comm; 2773 } else { /* retrieve psubcomm and subcomm */ 2774 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2775 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2776 if (subsize == 1) { 2777 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2778 redund = c->redundant; 2779 } else { 2780 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2781 redund = c->redundant; 2782 } 2783 psubcomm = redund->psubcomm; 2784 } 2785 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2786 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2787 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2788 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2789 if (subsize == 1) { 2790 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2791 c->redundant->psubcomm = psubcomm; 2792 } else { 2793 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2794 c->redundant->psubcomm = psubcomm ; 2795 } 2796 } 2797 PetscFunctionReturn(0); 2798 } 2799 } 2800 2801 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2802 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2803 if (reuse == MAT_INITIAL_MATRIX) { 2804 /* create a local sequential matrix matseq[0] */ 2805 mloc_sub = PETSC_DECIDE; 2806 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2807 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2808 rstart = rend - mloc_sub; 2809 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2810 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2811 } else { /* reuse == MAT_REUSE_MATRIX */ 2812 if (subsize == 1) { 2813 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2814 redund = c->redundant; 2815 } else { 2816 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2817 redund = c->redundant; 2818 } 2819 2820 isrow = redund->isrow; 2821 iscol = redund->iscol; 2822 matseq = redund->matseq; 2823 } 2824 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2825 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2826 2827 if (reuse == MAT_INITIAL_MATRIX) { 2828 /* create a supporting struct and attach it to C for reuse */ 2829 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2830 if (subsize == 1) { 2831 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2832 c->redundant = redund; 2833 } else { 2834 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2835 c->redundant = redund; 2836 } 2837 redund->isrow = isrow; 2838 redund->iscol = iscol; 2839 redund->matseq = matseq; 2840 redund->psubcomm = psubcomm; 2841 redund->Destroy = (*matredundant)->ops->destroy; 2842 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2843 } 2844 PetscFunctionReturn(0); 2845 } 2846 2847 #undef __FUNCT__ 2848 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2849 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2850 { 2851 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2852 PetscErrorCode ierr; 2853 PetscInt i,*idxb = 0; 2854 PetscScalar *va,*vb; 2855 Vec vtmp; 2856 2857 PetscFunctionBegin; 2858 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2859 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2860 if (idx) { 2861 for (i=0; i<A->rmap->n; i++) { 2862 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2863 } 2864 } 2865 2866 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2867 if (idx) { 2868 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2869 } 2870 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2871 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2872 2873 for (i=0; i<A->rmap->n; i++) { 2874 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2875 va[i] = vb[i]; 2876 if (idx) idx[i] = a->garray[idxb[i]]; 2877 } 2878 } 2879 2880 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2881 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2882 ierr = PetscFree(idxb);CHKERRQ(ierr); 2883 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2884 PetscFunctionReturn(0); 2885 } 2886 2887 #undef __FUNCT__ 2888 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2889 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2890 { 2891 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2892 PetscErrorCode ierr; 2893 PetscInt i,*idxb = 0; 2894 PetscScalar *va,*vb; 2895 Vec vtmp; 2896 2897 PetscFunctionBegin; 2898 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2899 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2900 if (idx) { 2901 for (i=0; i<A->cmap->n; i++) { 2902 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2903 } 2904 } 2905 2906 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2907 if (idx) { 2908 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2909 } 2910 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2911 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2912 2913 for (i=0; i<A->rmap->n; i++) { 2914 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2915 va[i] = vb[i]; 2916 if (idx) idx[i] = a->garray[idxb[i]]; 2917 } 2918 } 2919 2920 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2921 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2922 ierr = PetscFree(idxb);CHKERRQ(ierr); 2923 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2924 PetscFunctionReturn(0); 2925 } 2926 2927 #undef __FUNCT__ 2928 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2929 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2930 { 2931 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2932 PetscInt n = A->rmap->n; 2933 PetscInt cstart = A->cmap->rstart; 2934 PetscInt *cmap = mat->garray; 2935 PetscInt *diagIdx, *offdiagIdx; 2936 Vec diagV, offdiagV; 2937 PetscScalar *a, *diagA, *offdiagA; 2938 PetscInt r; 2939 PetscErrorCode ierr; 2940 2941 PetscFunctionBegin; 2942 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2943 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2944 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2945 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2946 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2947 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2948 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2949 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2950 for (r = 0; r < n; ++r) { 2951 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2952 a[r] = diagA[r]; 2953 idx[r] = cstart + diagIdx[r]; 2954 } else { 2955 a[r] = offdiagA[r]; 2956 idx[r] = cmap[offdiagIdx[r]]; 2957 } 2958 } 2959 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2960 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2961 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2962 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2963 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2964 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2965 PetscFunctionReturn(0); 2966 } 2967 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2970 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2971 { 2972 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2973 PetscInt n = A->rmap->n; 2974 PetscInt cstart = A->cmap->rstart; 2975 PetscInt *cmap = mat->garray; 2976 PetscInt *diagIdx, *offdiagIdx; 2977 Vec diagV, offdiagV; 2978 PetscScalar *a, *diagA, *offdiagA; 2979 PetscInt r; 2980 PetscErrorCode ierr; 2981 2982 PetscFunctionBegin; 2983 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2984 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2985 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2986 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2987 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2988 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2989 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2990 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2991 for (r = 0; r < n; ++r) { 2992 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2993 a[r] = diagA[r]; 2994 idx[r] = cstart + diagIdx[r]; 2995 } else { 2996 a[r] = offdiagA[r]; 2997 idx[r] = cmap[offdiagIdx[r]]; 2998 } 2999 } 3000 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3001 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3002 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3003 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3004 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3005 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3006 PetscFunctionReturn(0); 3007 } 3008 3009 #undef __FUNCT__ 3010 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3011 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3012 { 3013 PetscErrorCode ierr; 3014 Mat *dummy; 3015 3016 PetscFunctionBegin; 3017 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3018 *newmat = *dummy; 3019 ierr = PetscFree(dummy);CHKERRQ(ierr); 3020 PetscFunctionReturn(0); 3021 } 3022 3023 #undef __FUNCT__ 3024 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3025 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3026 { 3027 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3028 PetscErrorCode ierr; 3029 3030 PetscFunctionBegin; 3031 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3032 PetscFunctionReturn(0); 3033 } 3034 3035 #undef __FUNCT__ 3036 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3037 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3038 { 3039 PetscErrorCode ierr; 3040 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3041 3042 PetscFunctionBegin; 3043 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3044 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3045 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3046 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3047 PetscFunctionReturn(0); 3048 } 3049 3050 /* -------------------------------------------------------------------*/ 3051 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3052 MatGetRow_MPIAIJ, 3053 MatRestoreRow_MPIAIJ, 3054 MatMult_MPIAIJ, 3055 /* 4*/ MatMultAdd_MPIAIJ, 3056 MatMultTranspose_MPIAIJ, 3057 MatMultTransposeAdd_MPIAIJ, 3058 #if defined(PETSC_HAVE_PBGL) 3059 MatSolve_MPIAIJ, 3060 #else 3061 0, 3062 #endif 3063 0, 3064 0, 3065 /*10*/ 0, 3066 0, 3067 0, 3068 MatSOR_MPIAIJ, 3069 MatTranspose_MPIAIJ, 3070 /*15*/ MatGetInfo_MPIAIJ, 3071 MatEqual_MPIAIJ, 3072 MatGetDiagonal_MPIAIJ, 3073 MatDiagonalScale_MPIAIJ, 3074 MatNorm_MPIAIJ, 3075 /*20*/ MatAssemblyBegin_MPIAIJ, 3076 MatAssemblyEnd_MPIAIJ, 3077 MatSetOption_MPIAIJ, 3078 MatZeroEntries_MPIAIJ, 3079 /*24*/ MatZeroRows_MPIAIJ, 3080 0, 3081 #if defined(PETSC_HAVE_PBGL) 3082 0, 3083 #else 3084 0, 3085 #endif 3086 0, 3087 0, 3088 /*29*/ MatSetUp_MPIAIJ, 3089 #if defined(PETSC_HAVE_PBGL) 3090 0, 3091 #else 3092 0, 3093 #endif 3094 0, 3095 0, 3096 0, 3097 /*34*/ MatDuplicate_MPIAIJ, 3098 0, 3099 0, 3100 0, 3101 0, 3102 /*39*/ MatAXPY_MPIAIJ, 3103 MatGetSubMatrices_MPIAIJ, 3104 MatIncreaseOverlap_MPIAIJ, 3105 MatGetValues_MPIAIJ, 3106 MatCopy_MPIAIJ, 3107 /*44*/ MatGetRowMax_MPIAIJ, 3108 MatScale_MPIAIJ, 3109 0, 3110 0, 3111 MatZeroRowsColumns_MPIAIJ, 3112 /*49*/ MatSetRandom_MPIAIJ, 3113 0, 3114 0, 3115 0, 3116 0, 3117 /*54*/ MatFDColoringCreate_MPIXAIJ, 3118 0, 3119 MatSetUnfactored_MPIAIJ, 3120 MatPermute_MPIAIJ, 3121 0, 3122 /*59*/ MatGetSubMatrix_MPIAIJ, 3123 MatDestroy_MPIAIJ, 3124 MatView_MPIAIJ, 3125 0, 3126 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3127 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3128 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3129 0, 3130 0, 3131 0, 3132 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3133 MatGetRowMinAbs_MPIAIJ, 3134 0, 3135 MatSetColoring_MPIAIJ, 3136 0, 3137 MatSetValuesAdifor_MPIAIJ, 3138 /*75*/ MatFDColoringApply_AIJ, 3139 0, 3140 0, 3141 0, 3142 MatFindZeroDiagonals_MPIAIJ, 3143 /*80*/ 0, 3144 0, 3145 0, 3146 /*83*/ MatLoad_MPIAIJ, 3147 0, 3148 0, 3149 0, 3150 0, 3151 0, 3152 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3153 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3154 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3155 MatPtAP_MPIAIJ_MPIAIJ, 3156 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3157 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3158 0, 3159 0, 3160 0, 3161 0, 3162 /*99*/ 0, 3163 0, 3164 0, 3165 MatConjugate_MPIAIJ, 3166 0, 3167 /*104*/MatSetValuesRow_MPIAIJ, 3168 MatRealPart_MPIAIJ, 3169 MatImaginaryPart_MPIAIJ, 3170 0, 3171 0, 3172 /*109*/0, 3173 MatGetRedundantMatrix_MPIAIJ, 3174 MatGetRowMin_MPIAIJ, 3175 0, 3176 0, 3177 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3178 0, 3179 0, 3180 0, 3181 0, 3182 /*119*/0, 3183 0, 3184 0, 3185 0, 3186 MatGetMultiProcBlock_MPIAIJ, 3187 /*124*/MatFindNonzeroRows_MPIAIJ, 3188 MatGetColumnNorms_MPIAIJ, 3189 MatInvertBlockDiagonal_MPIAIJ, 3190 0, 3191 MatGetSubMatricesParallel_MPIAIJ, 3192 /*129*/0, 3193 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3194 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3195 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3196 0, 3197 /*134*/0, 3198 0, 3199 0, 3200 0, 3201 0, 3202 /*139*/0, 3203 0, 3204 0, 3205 MatFDColoringSetUp_MPIXAIJ 3206 }; 3207 3208 /* ----------------------------------------------------------------------------------------*/ 3209 3210 #undef __FUNCT__ 3211 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3212 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3213 { 3214 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3215 PetscErrorCode ierr; 3216 3217 PetscFunctionBegin; 3218 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3219 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3220 PetscFunctionReturn(0); 3221 } 3222 3223 #undef __FUNCT__ 3224 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3225 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3226 { 3227 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3228 PetscErrorCode ierr; 3229 3230 PetscFunctionBegin; 3231 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3232 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3233 PetscFunctionReturn(0); 3234 } 3235 3236 #undef __FUNCT__ 3237 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3238 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3239 { 3240 Mat_MPIAIJ *b; 3241 PetscErrorCode ierr; 3242 3243 PetscFunctionBegin; 3244 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3245 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3246 b = (Mat_MPIAIJ*)B->data; 3247 3248 if (!B->preallocated) { 3249 /* Explicitly create 2 MATSEQAIJ matrices. */ 3250 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3251 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3252 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3253 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3254 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3255 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3256 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3257 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3258 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3259 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3260 } 3261 3262 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3263 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3264 B->preallocated = PETSC_TRUE; 3265 PetscFunctionReturn(0); 3266 } 3267 3268 #undef __FUNCT__ 3269 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3270 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3271 { 3272 Mat mat; 3273 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3274 PetscErrorCode ierr; 3275 3276 PetscFunctionBegin; 3277 *newmat = 0; 3278 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3279 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3280 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3281 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3282 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3283 a = (Mat_MPIAIJ*)mat->data; 3284 3285 mat->factortype = matin->factortype; 3286 mat->assembled = PETSC_TRUE; 3287 mat->insertmode = NOT_SET_VALUES; 3288 mat->preallocated = PETSC_TRUE; 3289 3290 a->size = oldmat->size; 3291 a->rank = oldmat->rank; 3292 a->donotstash = oldmat->donotstash; 3293 a->roworiented = oldmat->roworiented; 3294 a->rowindices = 0; 3295 a->rowvalues = 0; 3296 a->getrowactive = PETSC_FALSE; 3297 3298 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3299 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3300 3301 if (oldmat->colmap) { 3302 #if defined(PETSC_USE_CTABLE) 3303 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3304 #else 3305 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3306 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3307 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3308 #endif 3309 } else a->colmap = 0; 3310 if (oldmat->garray) { 3311 PetscInt len; 3312 len = oldmat->B->cmap->n; 3313 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3314 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3315 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3316 } else a->garray = 0; 3317 3318 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3319 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3320 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3321 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3322 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3323 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3324 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3325 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3326 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3327 *newmat = mat; 3328 PetscFunctionReturn(0); 3329 } 3330 3331 3332 3333 #undef __FUNCT__ 3334 #define __FUNCT__ "MatLoad_MPIAIJ" 3335 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3336 { 3337 PetscScalar *vals,*svals; 3338 MPI_Comm comm; 3339 PetscErrorCode ierr; 3340 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3341 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3342 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3343 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3344 PetscInt cend,cstart,n,*rowners,sizesset=1; 3345 int fd; 3346 PetscInt bs = 1; 3347 3348 PetscFunctionBegin; 3349 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3350 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3351 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3352 if (!rank) { 3353 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3354 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3355 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3356 } 3357 3358 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3359 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3360 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3361 3362 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3363 3364 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3365 M = header[1]; N = header[2]; 3366 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3367 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3368 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3369 3370 /* If global sizes are set, check if they are consistent with that given in the file */ 3371 if (sizesset) { 3372 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3373 } 3374 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3375 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3376 3377 /* determine ownership of all (block) rows */ 3378 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3379 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3380 else m = newMat->rmap->n; /* Set by user */ 3381 3382 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3383 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3384 3385 /* First process needs enough room for process with most rows */ 3386 if (!rank) { 3387 mmax = rowners[1]; 3388 for (i=2; i<=size; i++) { 3389 mmax = PetscMax(mmax, rowners[i]); 3390 } 3391 } else mmax = -1; /* unused, but compilers complain */ 3392 3393 rowners[0] = 0; 3394 for (i=2; i<=size; i++) { 3395 rowners[i] += rowners[i-1]; 3396 } 3397 rstart = rowners[rank]; 3398 rend = rowners[rank+1]; 3399 3400 /* distribute row lengths to all processors */ 3401 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3402 if (!rank) { 3403 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3404 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3405 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3406 for (j=0; j<m; j++) { 3407 procsnz[0] += ourlens[j]; 3408 } 3409 for (i=1; i<size; i++) { 3410 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3411 /* calculate the number of nonzeros on each processor */ 3412 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3413 procsnz[i] += rowlengths[j]; 3414 } 3415 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3416 } 3417 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3418 } else { 3419 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3420 } 3421 3422 if (!rank) { 3423 /* determine max buffer needed and allocate it */ 3424 maxnz = 0; 3425 for (i=0; i<size; i++) { 3426 maxnz = PetscMax(maxnz,procsnz[i]); 3427 } 3428 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3429 3430 /* read in my part of the matrix column indices */ 3431 nz = procsnz[0]; 3432 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3433 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3434 3435 /* read in every one elses and ship off */ 3436 for (i=1; i<size; i++) { 3437 nz = procsnz[i]; 3438 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3439 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3440 } 3441 ierr = PetscFree(cols);CHKERRQ(ierr); 3442 } else { 3443 /* determine buffer space needed for message */ 3444 nz = 0; 3445 for (i=0; i<m; i++) { 3446 nz += ourlens[i]; 3447 } 3448 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3449 3450 /* receive message of column indices*/ 3451 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3452 } 3453 3454 /* determine column ownership if matrix is not square */ 3455 if (N != M) { 3456 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3457 else n = newMat->cmap->n; 3458 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3459 cstart = cend - n; 3460 } else { 3461 cstart = rstart; 3462 cend = rend; 3463 n = cend - cstart; 3464 } 3465 3466 /* loop over local rows, determining number of off diagonal entries */ 3467 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3468 jj = 0; 3469 for (i=0; i<m; i++) { 3470 for (j=0; j<ourlens[i]; j++) { 3471 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3472 jj++; 3473 } 3474 } 3475 3476 for (i=0; i<m; i++) { 3477 ourlens[i] -= offlens[i]; 3478 } 3479 if (!sizesset) { 3480 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3481 } 3482 3483 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3484 3485 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3486 3487 for (i=0; i<m; i++) { 3488 ourlens[i] += offlens[i]; 3489 } 3490 3491 if (!rank) { 3492 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3493 3494 /* read in my part of the matrix numerical values */ 3495 nz = procsnz[0]; 3496 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3497 3498 /* insert into matrix */ 3499 jj = rstart; 3500 smycols = mycols; 3501 svals = vals; 3502 for (i=0; i<m; i++) { 3503 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3504 smycols += ourlens[i]; 3505 svals += ourlens[i]; 3506 jj++; 3507 } 3508 3509 /* read in other processors and ship out */ 3510 for (i=1; i<size; i++) { 3511 nz = procsnz[i]; 3512 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3513 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3514 } 3515 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3516 } else { 3517 /* receive numeric values */ 3518 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3519 3520 /* receive message of values*/ 3521 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3522 3523 /* insert into matrix */ 3524 jj = rstart; 3525 smycols = mycols; 3526 svals = vals; 3527 for (i=0; i<m; i++) { 3528 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3529 smycols += ourlens[i]; 3530 svals += ourlens[i]; 3531 jj++; 3532 } 3533 } 3534 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3535 ierr = PetscFree(vals);CHKERRQ(ierr); 3536 ierr = PetscFree(mycols);CHKERRQ(ierr); 3537 ierr = PetscFree(rowners);CHKERRQ(ierr); 3538 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3539 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3540 PetscFunctionReturn(0); 3541 } 3542 3543 #undef __FUNCT__ 3544 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3545 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3546 { 3547 PetscErrorCode ierr; 3548 IS iscol_local; 3549 PetscInt csize; 3550 3551 PetscFunctionBegin; 3552 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3553 if (call == MAT_REUSE_MATRIX) { 3554 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3555 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3556 } else { 3557 PetscInt cbs; 3558 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3559 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3560 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3561 } 3562 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3563 if (call == MAT_INITIAL_MATRIX) { 3564 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3565 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3566 } 3567 PetscFunctionReturn(0); 3568 } 3569 3570 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3571 #undef __FUNCT__ 3572 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3573 /* 3574 Not great since it makes two copies of the submatrix, first an SeqAIJ 3575 in local and then by concatenating the local matrices the end result. 3576 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3577 3578 Note: This requires a sequential iscol with all indices. 3579 */ 3580 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3581 { 3582 PetscErrorCode ierr; 3583 PetscMPIInt rank,size; 3584 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3585 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3586 PetscBool allcolumns, colflag; 3587 Mat M,Mreuse; 3588 MatScalar *vwork,*aa; 3589 MPI_Comm comm; 3590 Mat_SeqAIJ *aij; 3591 3592 PetscFunctionBegin; 3593 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3594 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3595 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3596 3597 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3598 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3599 if (colflag && ncol == mat->cmap->N) { 3600 allcolumns = PETSC_TRUE; 3601 } else { 3602 allcolumns = PETSC_FALSE; 3603 } 3604 if (call == MAT_REUSE_MATRIX) { 3605 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3606 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3607 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3608 } else { 3609 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3610 } 3611 3612 /* 3613 m - number of local rows 3614 n - number of columns (same on all processors) 3615 rstart - first row in new global matrix generated 3616 */ 3617 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3618 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3619 if (call == MAT_INITIAL_MATRIX) { 3620 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3621 ii = aij->i; 3622 jj = aij->j; 3623 3624 /* 3625 Determine the number of non-zeros in the diagonal and off-diagonal 3626 portions of the matrix in order to do correct preallocation 3627 */ 3628 3629 /* first get start and end of "diagonal" columns */ 3630 if (csize == PETSC_DECIDE) { 3631 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3632 if (mglobal == n) { /* square matrix */ 3633 nlocal = m; 3634 } else { 3635 nlocal = n/size + ((n % size) > rank); 3636 } 3637 } else { 3638 nlocal = csize; 3639 } 3640 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3641 rstart = rend - nlocal; 3642 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3643 3644 /* next, compute all the lengths */ 3645 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3646 olens = dlens + m; 3647 for (i=0; i<m; i++) { 3648 jend = ii[i+1] - ii[i]; 3649 olen = 0; 3650 dlen = 0; 3651 for (j=0; j<jend; j++) { 3652 if (*jj < rstart || *jj >= rend) olen++; 3653 else dlen++; 3654 jj++; 3655 } 3656 olens[i] = olen; 3657 dlens[i] = dlen; 3658 } 3659 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3660 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3661 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3662 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3663 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3664 ierr = PetscFree(dlens);CHKERRQ(ierr); 3665 } else { 3666 PetscInt ml,nl; 3667 3668 M = *newmat; 3669 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3670 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3671 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3672 /* 3673 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3674 rather than the slower MatSetValues(). 3675 */ 3676 M->was_assembled = PETSC_TRUE; 3677 M->assembled = PETSC_FALSE; 3678 } 3679 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3680 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3681 ii = aij->i; 3682 jj = aij->j; 3683 aa = aij->a; 3684 for (i=0; i<m; i++) { 3685 row = rstart + i; 3686 nz = ii[i+1] - ii[i]; 3687 cwork = jj; jj += nz; 3688 vwork = aa; aa += nz; 3689 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3690 } 3691 3692 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3693 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3694 *newmat = M; 3695 3696 /* save submatrix used in processor for next request */ 3697 if (call == MAT_INITIAL_MATRIX) { 3698 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3699 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3700 } 3701 PetscFunctionReturn(0); 3702 } 3703 3704 #undef __FUNCT__ 3705 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3706 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3707 { 3708 PetscInt m,cstart, cend,j,nnz,i,d; 3709 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3710 const PetscInt *JJ; 3711 PetscScalar *values; 3712 PetscErrorCode ierr; 3713 3714 PetscFunctionBegin; 3715 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3716 3717 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3718 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3719 m = B->rmap->n; 3720 cstart = B->cmap->rstart; 3721 cend = B->cmap->rend; 3722 rstart = B->rmap->rstart; 3723 3724 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3725 3726 #if defined(PETSC_USE_DEBUGGING) 3727 for (i=0; i<m; i++) { 3728 nnz = Ii[i+1]- Ii[i]; 3729 JJ = J + Ii[i]; 3730 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3731 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3732 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3733 } 3734 #endif 3735 3736 for (i=0; i<m; i++) { 3737 nnz = Ii[i+1]- Ii[i]; 3738 JJ = J + Ii[i]; 3739 nnz_max = PetscMax(nnz_max,nnz); 3740 d = 0; 3741 for (j=0; j<nnz; j++) { 3742 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3743 } 3744 d_nnz[i] = d; 3745 o_nnz[i] = nnz - d; 3746 } 3747 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3748 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3749 3750 if (v) values = (PetscScalar*)v; 3751 else { 3752 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3753 } 3754 3755 for (i=0; i<m; i++) { 3756 ii = i + rstart; 3757 nnz = Ii[i+1]- Ii[i]; 3758 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3759 } 3760 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3761 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3762 3763 if (!v) { 3764 ierr = PetscFree(values);CHKERRQ(ierr); 3765 } 3766 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3767 PetscFunctionReturn(0); 3768 } 3769 3770 #undef __FUNCT__ 3771 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3772 /*@ 3773 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3774 (the default parallel PETSc format). 3775 3776 Collective on MPI_Comm 3777 3778 Input Parameters: 3779 + B - the matrix 3780 . i - the indices into j for the start of each local row (starts with zero) 3781 . j - the column indices for each local row (starts with zero) 3782 - v - optional values in the matrix 3783 3784 Level: developer 3785 3786 Notes: 3787 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3788 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3789 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3790 3791 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3792 3793 The format which is used for the sparse matrix input, is equivalent to a 3794 row-major ordering.. i.e for the following matrix, the input data expected is 3795 as shown: 3796 3797 1 0 0 3798 2 0 3 P0 3799 ------- 3800 4 5 6 P1 3801 3802 Process0 [P0]: rows_owned=[0,1] 3803 i = {0,1,3} [size = nrow+1 = 2+1] 3804 j = {0,0,2} [size = nz = 6] 3805 v = {1,2,3} [size = nz = 6] 3806 3807 Process1 [P1]: rows_owned=[2] 3808 i = {0,3} [size = nrow+1 = 1+1] 3809 j = {0,1,2} [size = nz = 6] 3810 v = {4,5,6} [size = nz = 6] 3811 3812 .keywords: matrix, aij, compressed row, sparse, parallel 3813 3814 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3815 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3816 @*/ 3817 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3818 { 3819 PetscErrorCode ierr; 3820 3821 PetscFunctionBegin; 3822 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3823 PetscFunctionReturn(0); 3824 } 3825 3826 #undef __FUNCT__ 3827 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3828 /*@C 3829 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3830 (the default parallel PETSc format). For good matrix assembly performance 3831 the user should preallocate the matrix storage by setting the parameters 3832 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3833 performance can be increased by more than a factor of 50. 3834 3835 Collective on MPI_Comm 3836 3837 Input Parameters: 3838 + A - the matrix 3839 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3840 (same value is used for all local rows) 3841 . d_nnz - array containing the number of nonzeros in the various rows of the 3842 DIAGONAL portion of the local submatrix (possibly different for each row) 3843 or NULL, if d_nz is used to specify the nonzero structure. 3844 The size of this array is equal to the number of local rows, i.e 'm'. 3845 For matrices that will be factored, you must leave room for (and set) 3846 the diagonal entry even if it is zero. 3847 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3848 submatrix (same value is used for all local rows). 3849 - o_nnz - array containing the number of nonzeros in the various rows of the 3850 OFF-DIAGONAL portion of the local submatrix (possibly different for 3851 each row) or NULL, if o_nz is used to specify the nonzero 3852 structure. The size of this array is equal to the number 3853 of local rows, i.e 'm'. 3854 3855 If the *_nnz parameter is given then the *_nz parameter is ignored 3856 3857 The AIJ format (also called the Yale sparse matrix format or 3858 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3859 storage. The stored row and column indices begin with zero. 3860 See Users-Manual: ch_mat for details. 3861 3862 The parallel matrix is partitioned such that the first m0 rows belong to 3863 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3864 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3865 3866 The DIAGONAL portion of the local submatrix of a processor can be defined 3867 as the submatrix which is obtained by extraction the part corresponding to 3868 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3869 first row that belongs to the processor, r2 is the last row belonging to 3870 the this processor, and c1-c2 is range of indices of the local part of a 3871 vector suitable for applying the matrix to. This is an mxn matrix. In the 3872 common case of a square matrix, the row and column ranges are the same and 3873 the DIAGONAL part is also square. The remaining portion of the local 3874 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3875 3876 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3877 3878 You can call MatGetInfo() to get information on how effective the preallocation was; 3879 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3880 You can also run with the option -info and look for messages with the string 3881 malloc in them to see if additional memory allocation was needed. 3882 3883 Example usage: 3884 3885 Consider the following 8x8 matrix with 34 non-zero values, that is 3886 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3887 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3888 as follows: 3889 3890 .vb 3891 1 2 0 | 0 3 0 | 0 4 3892 Proc0 0 5 6 | 7 0 0 | 8 0 3893 9 0 10 | 11 0 0 | 12 0 3894 ------------------------------------- 3895 13 0 14 | 15 16 17 | 0 0 3896 Proc1 0 18 0 | 19 20 21 | 0 0 3897 0 0 0 | 22 23 0 | 24 0 3898 ------------------------------------- 3899 Proc2 25 26 27 | 0 0 28 | 29 0 3900 30 0 0 | 31 32 33 | 0 34 3901 .ve 3902 3903 This can be represented as a collection of submatrices as: 3904 3905 .vb 3906 A B C 3907 D E F 3908 G H I 3909 .ve 3910 3911 Where the submatrices A,B,C are owned by proc0, D,E,F are 3912 owned by proc1, G,H,I are owned by proc2. 3913 3914 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3915 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3916 The 'M','N' parameters are 8,8, and have the same values on all procs. 3917 3918 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3919 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3920 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3921 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3922 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3923 matrix, ans [DF] as another SeqAIJ matrix. 3924 3925 When d_nz, o_nz parameters are specified, d_nz storage elements are 3926 allocated for every row of the local diagonal submatrix, and o_nz 3927 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3928 One way to choose d_nz and o_nz is to use the max nonzerors per local 3929 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3930 In this case, the values of d_nz,o_nz are: 3931 .vb 3932 proc0 : dnz = 2, o_nz = 2 3933 proc1 : dnz = 3, o_nz = 2 3934 proc2 : dnz = 1, o_nz = 4 3935 .ve 3936 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3937 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3938 for proc3. i.e we are using 12+15+10=37 storage locations to store 3939 34 values. 3940 3941 When d_nnz, o_nnz parameters are specified, the storage is specified 3942 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3943 In the above case the values for d_nnz,o_nnz are: 3944 .vb 3945 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3946 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3947 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3948 .ve 3949 Here the space allocated is sum of all the above values i.e 34, and 3950 hence pre-allocation is perfect. 3951 3952 Level: intermediate 3953 3954 .keywords: matrix, aij, compressed row, sparse, parallel 3955 3956 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3957 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3958 @*/ 3959 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3960 { 3961 PetscErrorCode ierr; 3962 3963 PetscFunctionBegin; 3964 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3965 PetscValidType(B,1); 3966 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3967 PetscFunctionReturn(0); 3968 } 3969 3970 #undef __FUNCT__ 3971 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3972 /*@ 3973 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3974 CSR format the local rows. 3975 3976 Collective on MPI_Comm 3977 3978 Input Parameters: 3979 + comm - MPI communicator 3980 . m - number of local rows (Cannot be PETSC_DECIDE) 3981 . n - This value should be the same as the local size used in creating the 3982 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3983 calculated if N is given) For square matrices n is almost always m. 3984 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3985 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3986 . i - row indices 3987 . j - column indices 3988 - a - matrix values 3989 3990 Output Parameter: 3991 . mat - the matrix 3992 3993 Level: intermediate 3994 3995 Notes: 3996 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3997 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3998 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3999 4000 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4001 4002 The format which is used for the sparse matrix input, is equivalent to a 4003 row-major ordering.. i.e for the following matrix, the input data expected is 4004 as shown: 4005 4006 1 0 0 4007 2 0 3 P0 4008 ------- 4009 4 5 6 P1 4010 4011 Process0 [P0]: rows_owned=[0,1] 4012 i = {0,1,3} [size = nrow+1 = 2+1] 4013 j = {0,0,2} [size = nz = 6] 4014 v = {1,2,3} [size = nz = 6] 4015 4016 Process1 [P1]: rows_owned=[2] 4017 i = {0,3} [size = nrow+1 = 1+1] 4018 j = {0,1,2} [size = nz = 6] 4019 v = {4,5,6} [size = nz = 6] 4020 4021 .keywords: matrix, aij, compressed row, sparse, parallel 4022 4023 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4024 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4025 @*/ 4026 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4027 { 4028 PetscErrorCode ierr; 4029 4030 PetscFunctionBegin; 4031 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4032 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4033 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4034 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4035 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4036 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4037 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4038 PetscFunctionReturn(0); 4039 } 4040 4041 #undef __FUNCT__ 4042 #define __FUNCT__ "MatCreateAIJ" 4043 /*@C 4044 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4045 (the default parallel PETSc format). For good matrix assembly performance 4046 the user should preallocate the matrix storage by setting the parameters 4047 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4048 performance can be increased by more than a factor of 50. 4049 4050 Collective on MPI_Comm 4051 4052 Input Parameters: 4053 + comm - MPI communicator 4054 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4055 This value should be the same as the local size used in creating the 4056 y vector for the matrix-vector product y = Ax. 4057 . n - This value should be the same as the local size used in creating the 4058 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4059 calculated if N is given) For square matrices n is almost always m. 4060 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4061 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4062 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4063 (same value is used for all local rows) 4064 . d_nnz - array containing the number of nonzeros in the various rows of the 4065 DIAGONAL portion of the local submatrix (possibly different for each row) 4066 or NULL, if d_nz is used to specify the nonzero structure. 4067 The size of this array is equal to the number of local rows, i.e 'm'. 4068 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4069 submatrix (same value is used for all local rows). 4070 - o_nnz - array containing the number of nonzeros in the various rows of the 4071 OFF-DIAGONAL portion of the local submatrix (possibly different for 4072 each row) or NULL, if o_nz is used to specify the nonzero 4073 structure. The size of this array is equal to the number 4074 of local rows, i.e 'm'. 4075 4076 Output Parameter: 4077 . A - the matrix 4078 4079 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4080 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4081 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4082 4083 Notes: 4084 If the *_nnz parameter is given then the *_nz parameter is ignored 4085 4086 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4087 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4088 storage requirements for this matrix. 4089 4090 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4091 processor than it must be used on all processors that share the object for 4092 that argument. 4093 4094 The user MUST specify either the local or global matrix dimensions 4095 (possibly both). 4096 4097 The parallel matrix is partitioned across processors such that the 4098 first m0 rows belong to process 0, the next m1 rows belong to 4099 process 1, the next m2 rows belong to process 2 etc.. where 4100 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4101 values corresponding to [m x N] submatrix. 4102 4103 The columns are logically partitioned with the n0 columns belonging 4104 to 0th partition, the next n1 columns belonging to the next 4105 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4106 4107 The DIAGONAL portion of the local submatrix on any given processor 4108 is the submatrix corresponding to the rows and columns m,n 4109 corresponding to the given processor. i.e diagonal matrix on 4110 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4111 etc. The remaining portion of the local submatrix [m x (N-n)] 4112 constitute the OFF-DIAGONAL portion. The example below better 4113 illustrates this concept. 4114 4115 For a square global matrix we define each processor's diagonal portion 4116 to be its local rows and the corresponding columns (a square submatrix); 4117 each processor's off-diagonal portion encompasses the remainder of the 4118 local matrix (a rectangular submatrix). 4119 4120 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4121 4122 When calling this routine with a single process communicator, a matrix of 4123 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4124 type of communicator, use the construction mechanism: 4125 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4126 4127 By default, this format uses inodes (identical nodes) when possible. 4128 We search for consecutive rows with the same nonzero structure, thereby 4129 reusing matrix information to achieve increased efficiency. 4130 4131 Options Database Keys: 4132 + -mat_no_inode - Do not use inodes 4133 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4134 - -mat_aij_oneindex - Internally use indexing starting at 1 4135 rather than 0. Note that when calling MatSetValues(), 4136 the user still MUST index entries starting at 0! 4137 4138 4139 Example usage: 4140 4141 Consider the following 8x8 matrix with 34 non-zero values, that is 4142 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4143 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4144 as follows: 4145 4146 .vb 4147 1 2 0 | 0 3 0 | 0 4 4148 Proc0 0 5 6 | 7 0 0 | 8 0 4149 9 0 10 | 11 0 0 | 12 0 4150 ------------------------------------- 4151 13 0 14 | 15 16 17 | 0 0 4152 Proc1 0 18 0 | 19 20 21 | 0 0 4153 0 0 0 | 22 23 0 | 24 0 4154 ------------------------------------- 4155 Proc2 25 26 27 | 0 0 28 | 29 0 4156 30 0 0 | 31 32 33 | 0 34 4157 .ve 4158 4159 This can be represented as a collection of submatrices as: 4160 4161 .vb 4162 A B C 4163 D E F 4164 G H I 4165 .ve 4166 4167 Where the submatrices A,B,C are owned by proc0, D,E,F are 4168 owned by proc1, G,H,I are owned by proc2. 4169 4170 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4171 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4172 The 'M','N' parameters are 8,8, and have the same values on all procs. 4173 4174 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4175 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4176 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4177 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4178 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4179 matrix, ans [DF] as another SeqAIJ matrix. 4180 4181 When d_nz, o_nz parameters are specified, d_nz storage elements are 4182 allocated for every row of the local diagonal submatrix, and o_nz 4183 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4184 One way to choose d_nz and o_nz is to use the max nonzerors per local 4185 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4186 In this case, the values of d_nz,o_nz are: 4187 .vb 4188 proc0 : dnz = 2, o_nz = 2 4189 proc1 : dnz = 3, o_nz = 2 4190 proc2 : dnz = 1, o_nz = 4 4191 .ve 4192 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4193 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4194 for proc3. i.e we are using 12+15+10=37 storage locations to store 4195 34 values. 4196 4197 When d_nnz, o_nnz parameters are specified, the storage is specified 4198 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4199 In the above case the values for d_nnz,o_nnz are: 4200 .vb 4201 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4202 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4203 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4204 .ve 4205 Here the space allocated is sum of all the above values i.e 34, and 4206 hence pre-allocation is perfect. 4207 4208 Level: intermediate 4209 4210 .keywords: matrix, aij, compressed row, sparse, parallel 4211 4212 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4213 MPIAIJ, MatCreateMPIAIJWithArrays() 4214 @*/ 4215 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4216 { 4217 PetscErrorCode ierr; 4218 PetscMPIInt size; 4219 4220 PetscFunctionBegin; 4221 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4222 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4223 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4224 if (size > 1) { 4225 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4226 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4227 } else { 4228 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4229 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4230 } 4231 PetscFunctionReturn(0); 4232 } 4233 4234 #undef __FUNCT__ 4235 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4236 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4237 { 4238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4239 4240 PetscFunctionBegin; 4241 *Ad = a->A; 4242 *Ao = a->B; 4243 *colmap = a->garray; 4244 PetscFunctionReturn(0); 4245 } 4246 4247 #undef __FUNCT__ 4248 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4249 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4250 { 4251 PetscErrorCode ierr; 4252 PetscInt i; 4253 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4254 4255 PetscFunctionBegin; 4256 if (coloring->ctype == IS_COLORING_GLOBAL) { 4257 ISColoringValue *allcolors,*colors; 4258 ISColoring ocoloring; 4259 4260 /* set coloring for diagonal portion */ 4261 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4262 4263 /* set coloring for off-diagonal portion */ 4264 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4265 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4266 for (i=0; i<a->B->cmap->n; i++) { 4267 colors[i] = allcolors[a->garray[i]]; 4268 } 4269 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4270 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4271 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4272 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4273 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4274 ISColoringValue *colors; 4275 PetscInt *larray; 4276 ISColoring ocoloring; 4277 4278 /* set coloring for diagonal portion */ 4279 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4280 for (i=0; i<a->A->cmap->n; i++) { 4281 larray[i] = i + A->cmap->rstart; 4282 } 4283 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4284 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4285 for (i=0; i<a->A->cmap->n; i++) { 4286 colors[i] = coloring->colors[larray[i]]; 4287 } 4288 ierr = PetscFree(larray);CHKERRQ(ierr); 4289 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4290 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4291 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4292 4293 /* set coloring for off-diagonal portion */ 4294 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4295 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4296 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4297 for (i=0; i<a->B->cmap->n; i++) { 4298 colors[i] = coloring->colors[larray[i]]; 4299 } 4300 ierr = PetscFree(larray);CHKERRQ(ierr); 4301 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4302 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4303 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4304 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4305 PetscFunctionReturn(0); 4306 } 4307 4308 #undef __FUNCT__ 4309 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4310 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4311 { 4312 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4313 PetscErrorCode ierr; 4314 4315 PetscFunctionBegin; 4316 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4317 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4318 PetscFunctionReturn(0); 4319 } 4320 4321 #undef __FUNCT__ 4322 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4323 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4324 { 4325 PetscErrorCode ierr; 4326 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4327 PetscInt *indx; 4328 4329 PetscFunctionBegin; 4330 /* This routine will ONLY return MPIAIJ type matrix */ 4331 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4332 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4333 if (n == PETSC_DECIDE) { 4334 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4335 } 4336 /* Check sum(n) = N */ 4337 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4338 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4339 4340 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4341 rstart -= m; 4342 4343 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4344 for (i=0; i<m; i++) { 4345 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4346 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4347 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4348 } 4349 4350 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4351 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4352 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4353 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4354 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4355 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4356 PetscFunctionReturn(0); 4357 } 4358 4359 #undef __FUNCT__ 4360 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4361 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4362 { 4363 PetscErrorCode ierr; 4364 PetscInt m,N,i,rstart,nnz,Ii; 4365 PetscInt *indx; 4366 PetscScalar *values; 4367 4368 PetscFunctionBegin; 4369 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4370 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4371 for (i=0; i<m; i++) { 4372 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4373 Ii = i + rstart; 4374 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4375 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4376 } 4377 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4378 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4379 PetscFunctionReturn(0); 4380 } 4381 4382 #undef __FUNCT__ 4383 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4384 /*@ 4385 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4386 matrices from each processor 4387 4388 Collective on MPI_Comm 4389 4390 Input Parameters: 4391 + comm - the communicators the parallel matrix will live on 4392 . inmat - the input sequential matrices 4393 . n - number of local columns (or PETSC_DECIDE) 4394 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4395 4396 Output Parameter: 4397 . outmat - the parallel matrix generated 4398 4399 Level: advanced 4400 4401 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4402 4403 @*/ 4404 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4405 { 4406 PetscErrorCode ierr; 4407 PetscMPIInt size; 4408 4409 PetscFunctionBegin; 4410 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4411 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4412 if (size == 1) { 4413 if (scall == MAT_INITIAL_MATRIX) { 4414 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4415 } else { 4416 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4417 } 4418 } else { 4419 if (scall == MAT_INITIAL_MATRIX) { 4420 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4421 } 4422 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4423 } 4424 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4425 PetscFunctionReturn(0); 4426 } 4427 4428 #undef __FUNCT__ 4429 #define __FUNCT__ "MatFileSplit" 4430 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4431 { 4432 PetscErrorCode ierr; 4433 PetscMPIInt rank; 4434 PetscInt m,N,i,rstart,nnz; 4435 size_t len; 4436 const PetscInt *indx; 4437 PetscViewer out; 4438 char *name; 4439 Mat B; 4440 const PetscScalar *values; 4441 4442 PetscFunctionBegin; 4443 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4444 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4445 /* Should this be the type of the diagonal block of A? */ 4446 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4447 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4448 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4449 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4450 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4451 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4452 for (i=0; i<m; i++) { 4453 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4454 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4455 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4456 } 4457 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4458 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4459 4460 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4461 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4462 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4463 sprintf(name,"%s.%d",outfile,rank); 4464 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4465 ierr = PetscFree(name);CHKERRQ(ierr); 4466 ierr = MatView(B,out);CHKERRQ(ierr); 4467 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4468 ierr = MatDestroy(&B);CHKERRQ(ierr); 4469 PetscFunctionReturn(0); 4470 } 4471 4472 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4473 #undef __FUNCT__ 4474 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4475 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4476 { 4477 PetscErrorCode ierr; 4478 Mat_Merge_SeqsToMPI *merge; 4479 PetscContainer container; 4480 4481 PetscFunctionBegin; 4482 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4483 if (container) { 4484 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4487 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4497 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4498 ierr = PetscFree(merge);CHKERRQ(ierr); 4499 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4500 } 4501 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4502 PetscFunctionReturn(0); 4503 } 4504 4505 #include <../src/mat/utils/freespace.h> 4506 #include <petscbt.h> 4507 4508 #undef __FUNCT__ 4509 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4510 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4511 { 4512 PetscErrorCode ierr; 4513 MPI_Comm comm; 4514 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4515 PetscMPIInt size,rank,taga,*len_s; 4516 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4517 PetscInt proc,m; 4518 PetscInt **buf_ri,**buf_rj; 4519 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4520 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4521 MPI_Request *s_waits,*r_waits; 4522 MPI_Status *status; 4523 MatScalar *aa=a->a; 4524 MatScalar **abuf_r,*ba_i; 4525 Mat_Merge_SeqsToMPI *merge; 4526 PetscContainer container; 4527 4528 PetscFunctionBegin; 4529 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4530 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4531 4532 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4533 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4534 4535 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4536 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4537 4538 bi = merge->bi; 4539 bj = merge->bj; 4540 buf_ri = merge->buf_ri; 4541 buf_rj = merge->buf_rj; 4542 4543 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4544 owners = merge->rowmap->range; 4545 len_s = merge->len_s; 4546 4547 /* send and recv matrix values */ 4548 /*-----------------------------*/ 4549 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4550 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4551 4552 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4553 for (proc=0,k=0; proc<size; proc++) { 4554 if (!len_s[proc]) continue; 4555 i = owners[proc]; 4556 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4557 k++; 4558 } 4559 4560 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4561 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4562 ierr = PetscFree(status);CHKERRQ(ierr); 4563 4564 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4565 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4566 4567 /* insert mat values of mpimat */ 4568 /*----------------------------*/ 4569 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4570 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4571 4572 for (k=0; k<merge->nrecv; k++) { 4573 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4574 nrows = *(buf_ri_k[k]); 4575 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4576 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4577 } 4578 4579 /* set values of ba */ 4580 m = merge->rowmap->n; 4581 for (i=0; i<m; i++) { 4582 arow = owners[rank] + i; 4583 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4584 bnzi = bi[i+1] - bi[i]; 4585 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4586 4587 /* add local non-zero vals of this proc's seqmat into ba */ 4588 anzi = ai[arow+1] - ai[arow]; 4589 aj = a->j + ai[arow]; 4590 aa = a->a + ai[arow]; 4591 nextaj = 0; 4592 for (j=0; nextaj<anzi; j++) { 4593 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4594 ba_i[j] += aa[nextaj++]; 4595 } 4596 } 4597 4598 /* add received vals into ba */ 4599 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4600 /* i-th row */ 4601 if (i == *nextrow[k]) { 4602 anzi = *(nextai[k]+1) - *nextai[k]; 4603 aj = buf_rj[k] + *(nextai[k]); 4604 aa = abuf_r[k] + *(nextai[k]); 4605 nextaj = 0; 4606 for (j=0; nextaj<anzi; j++) { 4607 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4608 ba_i[j] += aa[nextaj++]; 4609 } 4610 } 4611 nextrow[k]++; nextai[k]++; 4612 } 4613 } 4614 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4615 } 4616 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4617 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4618 4619 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4620 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4621 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4622 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4623 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4624 PetscFunctionReturn(0); 4625 } 4626 4627 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4628 4629 #undef __FUNCT__ 4630 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4631 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4632 { 4633 PetscErrorCode ierr; 4634 Mat B_mpi; 4635 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4636 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4637 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4638 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4639 PetscInt len,proc,*dnz,*onz,bs,cbs; 4640 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4641 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4642 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4643 MPI_Status *status; 4644 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4645 PetscBT lnkbt; 4646 Mat_Merge_SeqsToMPI *merge; 4647 PetscContainer container; 4648 4649 PetscFunctionBegin; 4650 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4651 4652 /* make sure it is a PETSc comm */ 4653 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4654 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4655 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4656 4657 ierr = PetscNew(&merge);CHKERRQ(ierr); 4658 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4659 4660 /* determine row ownership */ 4661 /*---------------------------------------------------------*/ 4662 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4663 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4664 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4665 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4666 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4667 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4668 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4669 4670 m = merge->rowmap->n; 4671 owners = merge->rowmap->range; 4672 4673 /* determine the number of messages to send, their lengths */ 4674 /*---------------------------------------------------------*/ 4675 len_s = merge->len_s; 4676 4677 len = 0; /* length of buf_si[] */ 4678 merge->nsend = 0; 4679 for (proc=0; proc<size; proc++) { 4680 len_si[proc] = 0; 4681 if (proc == rank) { 4682 len_s[proc] = 0; 4683 } else { 4684 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4685 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4686 } 4687 if (len_s[proc]) { 4688 merge->nsend++; 4689 nrows = 0; 4690 for (i=owners[proc]; i<owners[proc+1]; i++) { 4691 if (ai[i+1] > ai[i]) nrows++; 4692 } 4693 len_si[proc] = 2*(nrows+1); 4694 len += len_si[proc]; 4695 } 4696 } 4697 4698 /* determine the number and length of messages to receive for ij-structure */ 4699 /*-------------------------------------------------------------------------*/ 4700 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4701 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4702 4703 /* post the Irecv of j-structure */ 4704 /*-------------------------------*/ 4705 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4706 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4707 4708 /* post the Isend of j-structure */ 4709 /*--------------------------------*/ 4710 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4711 4712 for (proc=0, k=0; proc<size; proc++) { 4713 if (!len_s[proc]) continue; 4714 i = owners[proc]; 4715 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4716 k++; 4717 } 4718 4719 /* receives and sends of j-structure are complete */ 4720 /*------------------------------------------------*/ 4721 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4722 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4723 4724 /* send and recv i-structure */ 4725 /*---------------------------*/ 4726 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4727 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4728 4729 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4730 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4731 for (proc=0,k=0; proc<size; proc++) { 4732 if (!len_s[proc]) continue; 4733 /* form outgoing message for i-structure: 4734 buf_si[0]: nrows to be sent 4735 [1:nrows]: row index (global) 4736 [nrows+1:2*nrows+1]: i-structure index 4737 */ 4738 /*-------------------------------------------*/ 4739 nrows = len_si[proc]/2 - 1; 4740 buf_si_i = buf_si + nrows+1; 4741 buf_si[0] = nrows; 4742 buf_si_i[0] = 0; 4743 nrows = 0; 4744 for (i=owners[proc]; i<owners[proc+1]; i++) { 4745 anzi = ai[i+1] - ai[i]; 4746 if (anzi) { 4747 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4748 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4749 nrows++; 4750 } 4751 } 4752 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4753 k++; 4754 buf_si += len_si[proc]; 4755 } 4756 4757 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4758 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4759 4760 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4761 for (i=0; i<merge->nrecv; i++) { 4762 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4763 } 4764 4765 ierr = PetscFree(len_si);CHKERRQ(ierr); 4766 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4767 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4768 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4769 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4770 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4771 ierr = PetscFree(status);CHKERRQ(ierr); 4772 4773 /* compute a local seq matrix in each processor */ 4774 /*----------------------------------------------*/ 4775 /* allocate bi array and free space for accumulating nonzero column info */ 4776 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4777 bi[0] = 0; 4778 4779 /* create and initialize a linked list */ 4780 nlnk = N+1; 4781 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4782 4783 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4784 len = ai[owners[rank+1]] - ai[owners[rank]]; 4785 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4786 4787 current_space = free_space; 4788 4789 /* determine symbolic info for each local row */ 4790 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4791 4792 for (k=0; k<merge->nrecv; k++) { 4793 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4794 nrows = *buf_ri_k[k]; 4795 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4796 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4797 } 4798 4799 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4800 len = 0; 4801 for (i=0; i<m; i++) { 4802 bnzi = 0; 4803 /* add local non-zero cols of this proc's seqmat into lnk */ 4804 arow = owners[rank] + i; 4805 anzi = ai[arow+1] - ai[arow]; 4806 aj = a->j + ai[arow]; 4807 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4808 bnzi += nlnk; 4809 /* add received col data into lnk */ 4810 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4811 if (i == *nextrow[k]) { /* i-th row */ 4812 anzi = *(nextai[k]+1) - *nextai[k]; 4813 aj = buf_rj[k] + *nextai[k]; 4814 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4815 bnzi += nlnk; 4816 nextrow[k]++; nextai[k]++; 4817 } 4818 } 4819 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4820 4821 /* if free space is not available, make more free space */ 4822 if (current_space->local_remaining<bnzi) { 4823 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4824 nspacedouble++; 4825 } 4826 /* copy data into free space, then initialize lnk */ 4827 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4828 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4829 4830 current_space->array += bnzi; 4831 current_space->local_used += bnzi; 4832 current_space->local_remaining -= bnzi; 4833 4834 bi[i+1] = bi[i] + bnzi; 4835 } 4836 4837 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4838 4839 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4840 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4841 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4842 4843 /* create symbolic parallel matrix B_mpi */ 4844 /*---------------------------------------*/ 4845 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4846 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4847 if (n==PETSC_DECIDE) { 4848 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4849 } else { 4850 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4851 } 4852 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4853 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4854 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4855 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4856 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4857 4858 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4859 B_mpi->assembled = PETSC_FALSE; 4860 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4861 merge->bi = bi; 4862 merge->bj = bj; 4863 merge->buf_ri = buf_ri; 4864 merge->buf_rj = buf_rj; 4865 merge->coi = NULL; 4866 merge->coj = NULL; 4867 merge->owners_co = NULL; 4868 4869 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4870 4871 /* attach the supporting struct to B_mpi for reuse */ 4872 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4873 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4874 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4875 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4876 *mpimat = B_mpi; 4877 4878 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4879 PetscFunctionReturn(0); 4880 } 4881 4882 #undef __FUNCT__ 4883 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4884 /*@C 4885 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4886 matrices from each processor 4887 4888 Collective on MPI_Comm 4889 4890 Input Parameters: 4891 + comm - the communicators the parallel matrix will live on 4892 . seqmat - the input sequential matrices 4893 . m - number of local rows (or PETSC_DECIDE) 4894 . n - number of local columns (or PETSC_DECIDE) 4895 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4896 4897 Output Parameter: 4898 . mpimat - the parallel matrix generated 4899 4900 Level: advanced 4901 4902 Notes: 4903 The dimensions of the sequential matrix in each processor MUST be the same. 4904 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4905 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4906 @*/ 4907 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4908 { 4909 PetscErrorCode ierr; 4910 PetscMPIInt size; 4911 4912 PetscFunctionBegin; 4913 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4914 if (size == 1) { 4915 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4916 if (scall == MAT_INITIAL_MATRIX) { 4917 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4918 } else { 4919 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4920 } 4921 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4922 PetscFunctionReturn(0); 4923 } 4924 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4925 if (scall == MAT_INITIAL_MATRIX) { 4926 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4927 } 4928 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4929 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4930 PetscFunctionReturn(0); 4931 } 4932 4933 #undef __FUNCT__ 4934 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4935 /*@ 4936 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4937 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4938 with MatGetSize() 4939 4940 Not Collective 4941 4942 Input Parameters: 4943 + A - the matrix 4944 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4945 4946 Output Parameter: 4947 . A_loc - the local sequential matrix generated 4948 4949 Level: developer 4950 4951 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4952 4953 @*/ 4954 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4955 { 4956 PetscErrorCode ierr; 4957 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4958 Mat_SeqAIJ *mat,*a,*b; 4959 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4960 MatScalar *aa,*ba,*cam; 4961 PetscScalar *ca; 4962 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4963 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4964 PetscBool match; 4965 4966 PetscFunctionBegin; 4967 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4968 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4969 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4970 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4971 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4972 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4973 aa = a->a; ba = b->a; 4974 if (scall == MAT_INITIAL_MATRIX) { 4975 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4976 ci[0] = 0; 4977 for (i=0; i<am; i++) { 4978 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4979 } 4980 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4981 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4982 k = 0; 4983 for (i=0; i<am; i++) { 4984 ncols_o = bi[i+1] - bi[i]; 4985 ncols_d = ai[i+1] - ai[i]; 4986 /* off-diagonal portion of A */ 4987 for (jo=0; jo<ncols_o; jo++) { 4988 col = cmap[*bj]; 4989 if (col >= cstart) break; 4990 cj[k] = col; bj++; 4991 ca[k++] = *ba++; 4992 } 4993 /* diagonal portion of A */ 4994 for (j=0; j<ncols_d; j++) { 4995 cj[k] = cstart + *aj++; 4996 ca[k++] = *aa++; 4997 } 4998 /* off-diagonal portion of A */ 4999 for (j=jo; j<ncols_o; j++) { 5000 cj[k] = cmap[*bj++]; 5001 ca[k++] = *ba++; 5002 } 5003 } 5004 /* put together the new matrix */ 5005 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5006 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5007 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5008 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5009 mat->free_a = PETSC_TRUE; 5010 mat->free_ij = PETSC_TRUE; 5011 mat->nonew = 0; 5012 } else if (scall == MAT_REUSE_MATRIX) { 5013 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5014 ci = mat->i; cj = mat->j; cam = mat->a; 5015 for (i=0; i<am; i++) { 5016 /* off-diagonal portion of A */ 5017 ncols_o = bi[i+1] - bi[i]; 5018 for (jo=0; jo<ncols_o; jo++) { 5019 col = cmap[*bj]; 5020 if (col >= cstart) break; 5021 *cam++ = *ba++; bj++; 5022 } 5023 /* diagonal portion of A */ 5024 ncols_d = ai[i+1] - ai[i]; 5025 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5026 /* off-diagonal portion of A */ 5027 for (j=jo; j<ncols_o; j++) { 5028 *cam++ = *ba++; bj++; 5029 } 5030 } 5031 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5032 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5033 PetscFunctionReturn(0); 5034 } 5035 5036 #undef __FUNCT__ 5037 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5038 /*@C 5039 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5040 5041 Not Collective 5042 5043 Input Parameters: 5044 + A - the matrix 5045 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5046 - row, col - index sets of rows and columns to extract (or NULL) 5047 5048 Output Parameter: 5049 . A_loc - the local sequential matrix generated 5050 5051 Level: developer 5052 5053 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5054 5055 @*/ 5056 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5057 { 5058 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5059 PetscErrorCode ierr; 5060 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5061 IS isrowa,iscola; 5062 Mat *aloc; 5063 PetscBool match; 5064 5065 PetscFunctionBegin; 5066 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5067 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5068 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5069 if (!row) { 5070 start = A->rmap->rstart; end = A->rmap->rend; 5071 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5072 } else { 5073 isrowa = *row; 5074 } 5075 if (!col) { 5076 start = A->cmap->rstart; 5077 cmap = a->garray; 5078 nzA = a->A->cmap->n; 5079 nzB = a->B->cmap->n; 5080 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5081 ncols = 0; 5082 for (i=0; i<nzB; i++) { 5083 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5084 else break; 5085 } 5086 imark = i; 5087 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5088 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5089 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5090 } else { 5091 iscola = *col; 5092 } 5093 if (scall != MAT_INITIAL_MATRIX) { 5094 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5095 aloc[0] = *A_loc; 5096 } 5097 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5098 *A_loc = aloc[0]; 5099 ierr = PetscFree(aloc);CHKERRQ(ierr); 5100 if (!row) { 5101 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5102 } 5103 if (!col) { 5104 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5105 } 5106 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5107 PetscFunctionReturn(0); 5108 } 5109 5110 #undef __FUNCT__ 5111 #define __FUNCT__ "MatGetBrowsOfAcols" 5112 /*@C 5113 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5114 5115 Collective on Mat 5116 5117 Input Parameters: 5118 + A,B - the matrices in mpiaij format 5119 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5120 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5121 5122 Output Parameter: 5123 + rowb, colb - index sets of rows and columns of B to extract 5124 - B_seq - the sequential matrix generated 5125 5126 Level: developer 5127 5128 @*/ 5129 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5130 { 5131 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5132 PetscErrorCode ierr; 5133 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5134 IS isrowb,iscolb; 5135 Mat *bseq=NULL; 5136 5137 PetscFunctionBegin; 5138 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5139 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5140 } 5141 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5142 5143 if (scall == MAT_INITIAL_MATRIX) { 5144 start = A->cmap->rstart; 5145 cmap = a->garray; 5146 nzA = a->A->cmap->n; 5147 nzB = a->B->cmap->n; 5148 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5149 ncols = 0; 5150 for (i=0; i<nzB; i++) { /* row < local row index */ 5151 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5152 else break; 5153 } 5154 imark = i; 5155 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5156 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5157 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5158 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5159 } else { 5160 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5161 isrowb = *rowb; iscolb = *colb; 5162 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5163 bseq[0] = *B_seq; 5164 } 5165 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5166 *B_seq = bseq[0]; 5167 ierr = PetscFree(bseq);CHKERRQ(ierr); 5168 if (!rowb) { 5169 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5170 } else { 5171 *rowb = isrowb; 5172 } 5173 if (!colb) { 5174 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5175 } else { 5176 *colb = iscolb; 5177 } 5178 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5179 PetscFunctionReturn(0); 5180 } 5181 5182 #undef __FUNCT__ 5183 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5184 /* 5185 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5186 of the OFF-DIAGONAL portion of local A 5187 5188 Collective on Mat 5189 5190 Input Parameters: 5191 + A,B - the matrices in mpiaij format 5192 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5193 5194 Output Parameter: 5195 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5196 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5197 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5198 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5199 5200 Level: developer 5201 5202 */ 5203 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5204 { 5205 VecScatter_MPI_General *gen_to,*gen_from; 5206 PetscErrorCode ierr; 5207 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5208 Mat_SeqAIJ *b_oth; 5209 VecScatter ctx =a->Mvctx; 5210 MPI_Comm comm; 5211 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5212 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5213 PetscScalar *rvalues,*svalues; 5214 MatScalar *b_otha,*bufa,*bufA; 5215 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5216 MPI_Request *rwaits = NULL,*swaits = NULL; 5217 MPI_Status *sstatus,rstatus; 5218 PetscMPIInt jj; 5219 PetscInt *cols,sbs,rbs; 5220 PetscScalar *vals; 5221 5222 PetscFunctionBegin; 5223 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5224 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5225 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5226 } 5227 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5228 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5229 5230 gen_to = (VecScatter_MPI_General*)ctx->todata; 5231 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5232 rvalues = gen_from->values; /* holds the length of receiving row */ 5233 svalues = gen_to->values; /* holds the length of sending row */ 5234 nrecvs = gen_from->n; 5235 nsends = gen_to->n; 5236 5237 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5238 srow = gen_to->indices; /* local row index to be sent */ 5239 sstarts = gen_to->starts; 5240 sprocs = gen_to->procs; 5241 sstatus = gen_to->sstatus; 5242 sbs = gen_to->bs; 5243 rstarts = gen_from->starts; 5244 rprocs = gen_from->procs; 5245 rbs = gen_from->bs; 5246 5247 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5248 if (scall == MAT_INITIAL_MATRIX) { 5249 /* i-array */ 5250 /*---------*/ 5251 /* post receives */ 5252 for (i=0; i<nrecvs; i++) { 5253 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5254 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5255 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5256 } 5257 5258 /* pack the outgoing message */ 5259 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5260 5261 sstartsj[0] = 0; 5262 rstartsj[0] = 0; 5263 len = 0; /* total length of j or a array to be sent */ 5264 k = 0; 5265 for (i=0; i<nsends; i++) { 5266 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5267 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5268 for (j=0; j<nrows; j++) { 5269 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5270 for (l=0; l<sbs; l++) { 5271 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5272 5273 rowlen[j*sbs+l] = ncols; 5274 5275 len += ncols; 5276 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5277 } 5278 k++; 5279 } 5280 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5281 5282 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5283 } 5284 /* recvs and sends of i-array are completed */ 5285 i = nrecvs; 5286 while (i--) { 5287 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5288 } 5289 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5290 5291 /* allocate buffers for sending j and a arrays */ 5292 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5293 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5294 5295 /* create i-array of B_oth */ 5296 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5297 5298 b_othi[0] = 0; 5299 len = 0; /* total length of j or a array to be received */ 5300 k = 0; 5301 for (i=0; i<nrecvs; i++) { 5302 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5303 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5304 for (j=0; j<nrows; j++) { 5305 b_othi[k+1] = b_othi[k] + rowlen[j]; 5306 len += rowlen[j]; k++; 5307 } 5308 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5309 } 5310 5311 /* allocate space for j and a arrrays of B_oth */ 5312 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5313 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5314 5315 /* j-array */ 5316 /*---------*/ 5317 /* post receives of j-array */ 5318 for (i=0; i<nrecvs; i++) { 5319 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5320 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5321 } 5322 5323 /* pack the outgoing message j-array */ 5324 k = 0; 5325 for (i=0; i<nsends; i++) { 5326 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5327 bufJ = bufj+sstartsj[i]; 5328 for (j=0; j<nrows; j++) { 5329 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5330 for (ll=0; ll<sbs; ll++) { 5331 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5332 for (l=0; l<ncols; l++) { 5333 *bufJ++ = cols[l]; 5334 } 5335 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5336 } 5337 } 5338 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5339 } 5340 5341 /* recvs and sends of j-array are completed */ 5342 i = nrecvs; 5343 while (i--) { 5344 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5345 } 5346 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5347 } else if (scall == MAT_REUSE_MATRIX) { 5348 sstartsj = *startsj_s; 5349 rstartsj = *startsj_r; 5350 bufa = *bufa_ptr; 5351 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5352 b_otha = b_oth->a; 5353 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5354 5355 /* a-array */ 5356 /*---------*/ 5357 /* post receives of a-array */ 5358 for (i=0; i<nrecvs; i++) { 5359 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5360 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5361 } 5362 5363 /* pack the outgoing message a-array */ 5364 k = 0; 5365 for (i=0; i<nsends; i++) { 5366 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5367 bufA = bufa+sstartsj[i]; 5368 for (j=0; j<nrows; j++) { 5369 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5370 for (ll=0; ll<sbs; ll++) { 5371 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5372 for (l=0; l<ncols; l++) { 5373 *bufA++ = vals[l]; 5374 } 5375 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5376 } 5377 } 5378 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5379 } 5380 /* recvs and sends of a-array are completed */ 5381 i = nrecvs; 5382 while (i--) { 5383 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5384 } 5385 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5386 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5387 5388 if (scall == MAT_INITIAL_MATRIX) { 5389 /* put together the new matrix */ 5390 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5391 5392 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5393 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5394 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5395 b_oth->free_a = PETSC_TRUE; 5396 b_oth->free_ij = PETSC_TRUE; 5397 b_oth->nonew = 0; 5398 5399 ierr = PetscFree(bufj);CHKERRQ(ierr); 5400 if (!startsj_s || !bufa_ptr) { 5401 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5402 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5403 } else { 5404 *startsj_s = sstartsj; 5405 *startsj_r = rstartsj; 5406 *bufa_ptr = bufa; 5407 } 5408 } 5409 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5410 PetscFunctionReturn(0); 5411 } 5412 5413 #undef __FUNCT__ 5414 #define __FUNCT__ "MatGetCommunicationStructs" 5415 /*@C 5416 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5417 5418 Not Collective 5419 5420 Input Parameters: 5421 . A - The matrix in mpiaij format 5422 5423 Output Parameter: 5424 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5425 . colmap - A map from global column index to local index into lvec 5426 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5427 5428 Level: developer 5429 5430 @*/ 5431 #if defined(PETSC_USE_CTABLE) 5432 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5433 #else 5434 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5435 #endif 5436 { 5437 Mat_MPIAIJ *a; 5438 5439 PetscFunctionBegin; 5440 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5441 PetscValidPointer(lvec, 2); 5442 PetscValidPointer(colmap, 3); 5443 PetscValidPointer(multScatter, 4); 5444 a = (Mat_MPIAIJ*) A->data; 5445 if (lvec) *lvec = a->lvec; 5446 if (colmap) *colmap = a->colmap; 5447 if (multScatter) *multScatter = a->Mvctx; 5448 PetscFunctionReturn(0); 5449 } 5450 5451 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5452 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5453 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5454 5455 #undef __FUNCT__ 5456 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5457 /* 5458 Computes (B'*A')' since computing B*A directly is untenable 5459 5460 n p p 5461 ( ) ( ) ( ) 5462 m ( A ) * n ( B ) = m ( C ) 5463 ( ) ( ) ( ) 5464 5465 */ 5466 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5467 { 5468 PetscErrorCode ierr; 5469 Mat At,Bt,Ct; 5470 5471 PetscFunctionBegin; 5472 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5473 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5474 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5475 ierr = MatDestroy(&At);CHKERRQ(ierr); 5476 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5477 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5478 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5479 PetscFunctionReturn(0); 5480 } 5481 5482 #undef __FUNCT__ 5483 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5484 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5485 { 5486 PetscErrorCode ierr; 5487 PetscInt m=A->rmap->n,n=B->cmap->n; 5488 Mat Cmat; 5489 5490 PetscFunctionBegin; 5491 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5492 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5493 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5494 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5495 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5496 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5497 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5498 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5499 5500 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5501 5502 *C = Cmat; 5503 PetscFunctionReturn(0); 5504 } 5505 5506 /* ----------------------------------------------------------------*/ 5507 #undef __FUNCT__ 5508 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5509 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5510 { 5511 PetscErrorCode ierr; 5512 5513 PetscFunctionBegin; 5514 if (scall == MAT_INITIAL_MATRIX) { 5515 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5516 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5517 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5518 } 5519 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5520 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5521 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5522 PetscFunctionReturn(0); 5523 } 5524 5525 #if defined(PETSC_HAVE_MUMPS) 5526 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5527 #endif 5528 #if defined(PETSC_HAVE_PASTIX) 5529 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5530 #endif 5531 #if defined(PETSC_HAVE_SUPERLU_DIST) 5532 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5533 #endif 5534 #if defined(PETSC_HAVE_CLIQUE) 5535 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5536 #endif 5537 5538 /*MC 5539 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5540 5541 Options Database Keys: 5542 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5543 5544 Level: beginner 5545 5546 .seealso: MatCreateAIJ() 5547 M*/ 5548 5549 #undef __FUNCT__ 5550 #define __FUNCT__ "MatCreate_MPIAIJ" 5551 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5552 { 5553 Mat_MPIAIJ *b; 5554 PetscErrorCode ierr; 5555 PetscMPIInt size; 5556 5557 PetscFunctionBegin; 5558 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5559 5560 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5561 B->data = (void*)b; 5562 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5563 B->assembled = PETSC_FALSE; 5564 B->insertmode = NOT_SET_VALUES; 5565 b->size = size; 5566 5567 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5568 5569 /* build cache for off array entries formed */ 5570 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5571 5572 b->donotstash = PETSC_FALSE; 5573 b->colmap = 0; 5574 b->garray = 0; 5575 b->roworiented = PETSC_TRUE; 5576 5577 /* stuff used for matrix vector multiply */ 5578 b->lvec = NULL; 5579 b->Mvctx = NULL; 5580 5581 /* stuff for MatGetRow() */ 5582 b->rowindices = 0; 5583 b->rowvalues = 0; 5584 b->getrowactive = PETSC_FALSE; 5585 5586 /* flexible pointer used in CUSP/CUSPARSE classes */ 5587 b->spptr = NULL; 5588 5589 #if defined(PETSC_HAVE_MUMPS) 5590 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5591 #endif 5592 #if defined(PETSC_HAVE_PASTIX) 5593 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5594 #endif 5595 #if defined(PETSC_HAVE_SUPERLU_DIST) 5596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5597 #endif 5598 #if defined(PETSC_HAVE_CLIQUE) 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5600 #endif 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5615 PetscFunctionReturn(0); 5616 } 5617 5618 #undef __FUNCT__ 5619 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5620 /*@ 5621 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5622 and "off-diagonal" part of the matrix in CSR format. 5623 5624 Collective on MPI_Comm 5625 5626 Input Parameters: 5627 + comm - MPI communicator 5628 . m - number of local rows (Cannot be PETSC_DECIDE) 5629 . n - This value should be the same as the local size used in creating the 5630 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5631 calculated if N is given) For square matrices n is almost always m. 5632 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5633 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5634 . i - row indices for "diagonal" portion of matrix 5635 . j - column indices 5636 . a - matrix values 5637 . oi - row indices for "off-diagonal" portion of matrix 5638 . oj - column indices 5639 - oa - matrix values 5640 5641 Output Parameter: 5642 . mat - the matrix 5643 5644 Level: advanced 5645 5646 Notes: 5647 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5648 must free the arrays once the matrix has been destroyed and not before. 5649 5650 The i and j indices are 0 based 5651 5652 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5653 5654 This sets local rows and cannot be used to set off-processor values. 5655 5656 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5657 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5658 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5659 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5660 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5661 communication if it is known that only local entries will be set. 5662 5663 .keywords: matrix, aij, compressed row, sparse, parallel 5664 5665 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5666 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5667 @*/ 5668 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5669 { 5670 PetscErrorCode ierr; 5671 Mat_MPIAIJ *maij; 5672 5673 PetscFunctionBegin; 5674 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5675 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5676 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5677 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5678 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5679 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5680 maij = (Mat_MPIAIJ*) (*mat)->data; 5681 5682 (*mat)->preallocated = PETSC_TRUE; 5683 5684 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5685 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5686 5687 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5688 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5689 5690 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5691 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5692 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5693 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5694 5695 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5696 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5697 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5698 PetscFunctionReturn(0); 5699 } 5700 5701 /* 5702 Special version for direct calls from Fortran 5703 */ 5704 #include <petsc-private/fortranimpl.h> 5705 5706 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5707 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5708 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5709 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5710 #endif 5711 5712 /* Change these macros so can be used in void function */ 5713 #undef CHKERRQ 5714 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5715 #undef SETERRQ2 5716 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5717 #undef SETERRQ3 5718 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5719 #undef SETERRQ 5720 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5721 5722 #undef __FUNCT__ 5723 #define __FUNCT__ "matsetvaluesmpiaij_" 5724 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5725 { 5726 Mat mat = *mmat; 5727 PetscInt m = *mm, n = *mn; 5728 InsertMode addv = *maddv; 5729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5730 PetscScalar value; 5731 PetscErrorCode ierr; 5732 5733 MatCheckPreallocated(mat,1); 5734 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5735 5736 #if defined(PETSC_USE_DEBUG) 5737 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5738 #endif 5739 { 5740 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5741 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5742 PetscBool roworiented = aij->roworiented; 5743 5744 /* Some Variables required in the macro */ 5745 Mat A = aij->A; 5746 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5747 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5748 MatScalar *aa = a->a; 5749 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5750 Mat B = aij->B; 5751 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5752 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5753 MatScalar *ba = b->a; 5754 5755 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5756 PetscInt nonew = a->nonew; 5757 MatScalar *ap1,*ap2; 5758 5759 PetscFunctionBegin; 5760 for (i=0; i<m; i++) { 5761 if (im[i] < 0) continue; 5762 #if defined(PETSC_USE_DEBUG) 5763 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5764 #endif 5765 if (im[i] >= rstart && im[i] < rend) { 5766 row = im[i] - rstart; 5767 lastcol1 = -1; 5768 rp1 = aj + ai[row]; 5769 ap1 = aa + ai[row]; 5770 rmax1 = aimax[row]; 5771 nrow1 = ailen[row]; 5772 low1 = 0; 5773 high1 = nrow1; 5774 lastcol2 = -1; 5775 rp2 = bj + bi[row]; 5776 ap2 = ba + bi[row]; 5777 rmax2 = bimax[row]; 5778 nrow2 = bilen[row]; 5779 low2 = 0; 5780 high2 = nrow2; 5781 5782 for (j=0; j<n; j++) { 5783 if (roworiented) value = v[i*n+j]; 5784 else value = v[i+j*m]; 5785 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5786 if (in[j] >= cstart && in[j] < cend) { 5787 col = in[j] - cstart; 5788 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5789 } else if (in[j] < 0) continue; 5790 #if defined(PETSC_USE_DEBUG) 5791 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5792 #endif 5793 else { 5794 if (mat->was_assembled) { 5795 if (!aij->colmap) { 5796 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5797 } 5798 #if defined(PETSC_USE_CTABLE) 5799 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5800 col--; 5801 #else 5802 col = aij->colmap[in[j]] - 1; 5803 #endif 5804 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5805 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5806 col = in[j]; 5807 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5808 B = aij->B; 5809 b = (Mat_SeqAIJ*)B->data; 5810 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5811 rp2 = bj + bi[row]; 5812 ap2 = ba + bi[row]; 5813 rmax2 = bimax[row]; 5814 nrow2 = bilen[row]; 5815 low2 = 0; 5816 high2 = nrow2; 5817 bm = aij->B->rmap->n; 5818 ba = b->a; 5819 } 5820 } else col = in[j]; 5821 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5822 } 5823 } 5824 } else if (!aij->donotstash) { 5825 if (roworiented) { 5826 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5827 } else { 5828 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5829 } 5830 } 5831 } 5832 } 5833 PetscFunctionReturnVoid(); 5834 } 5835 5836