1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 for (i=0; i<m; i++) { 502 if (im[i] < 0) continue; 503 #if defined(PETSC_USE_DEBUG) 504 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 505 #endif 506 if (im[i] >= rstart && im[i] < rend) { 507 row = im[i] - rstart; 508 lastcol1 = -1; 509 rp1 = aj + ai[row]; 510 ap1 = aa + ai[row]; 511 rmax1 = aimax[row]; 512 nrow1 = ailen[row]; 513 low1 = 0; 514 high1 = nrow1; 515 lastcol2 = -1; 516 rp2 = bj + bi[row]; 517 ap2 = ba + bi[row]; 518 rmax2 = bimax[row]; 519 nrow2 = bilen[row]; 520 low2 = 0; 521 high2 = nrow2; 522 523 for (j=0; j<n; j++) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend) { 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined(PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend) { 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt nstash,reallocs; 633 InsertMode addv; 634 635 PetscFunctionBegin; 636 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 637 638 /* make sure all processors are either in INSERTMODE or ADDMODE */ 639 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 641 mat->insertmode = addv; /* in case this processor had no cache */ 642 643 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 644 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 645 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 655 PetscErrorCode ierr; 656 PetscMPIInt n; 657 PetscInt i,j,rstart,ncols,flg; 658 PetscInt *row,*col; 659 PetscBool other_disassembled; 660 PetscScalar *val; 661 InsertMode addv = mat->insertmode; 662 663 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 664 665 PetscFunctionBegin; 666 if (!aij->donotstash && !mat->nooffprocentries) { 667 while (1) { 668 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 669 if (!flg) break; 670 671 for (i=0; i<n; ) { 672 /* Now identify the consecutive vals belonging to the same row */ 673 for (j=i,rstart=row[j]; j<n; j++) { 674 if (row[j] != rstart) break; 675 } 676 if (j < n) ncols = j-i; 677 else ncols = n-i; 678 /* Now assemble all these values with a single function call */ 679 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 680 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 707 708 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 709 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 719 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 720 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 721 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 722 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 730 { 731 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 736 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroRows_MPIAIJ" 742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 743 { 744 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 745 PetscInt *owners = A->rmap->range; 746 PetscInt n = A->rmap->n; 747 PetscSF sf; 748 PetscInt *lrows; 749 PetscSFNode *rrows; 750 PetscInt r, p = 0, len = 0; 751 PetscErrorCode ierr; 752 753 PetscFunctionBegin; 754 /* Create SF where leaves are input rows and roots are owned rows */ 755 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 756 for (r = 0; r < n; ++r) lrows[r] = -1; 757 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 758 for (r = 0; r < N; ++r) { 759 const PetscInt idx = rows[r]; 760 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 761 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 762 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 763 } 764 if (A->nooffproczerorows) { 765 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 766 lrows[len++] = idx - owners[p]; 767 } else { 768 rrows[r].rank = p; 769 rrows[r].index = rows[r] - owners[p]; 770 } 771 } 772 if (!A->nooffproczerorows) { 773 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 774 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 775 /* Collect flags for rows to be zeroed */ 776 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 777 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 778 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 779 /* Compress and put in row numbers */ 780 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 781 } 782 /* fix right hand side if needed */ 783 if (x && b) { 784 const PetscScalar *xx; 785 PetscScalar *bb; 786 787 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 788 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 789 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 790 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 792 } 793 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 794 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 796 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 797 } else if (diag != 0.0) { 798 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 799 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 800 for (r = 0; r < len; ++r) { 801 const PetscInt row = lrows[r] + A->rmap->rstart; 802 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 803 } 804 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 805 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 } else { 807 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 808 } 809 ierr = PetscFree(lrows);CHKERRQ(ierr); 810 811 /* only change matrix nonzero state if pattern was allowed to be changed */ 812 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 813 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 814 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 822 { 823 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 824 PetscErrorCode ierr; 825 PetscMPIInt n = A->rmap->n; 826 PetscInt i,j,r,m,p = 0,len = 0; 827 PetscInt *lrows,*owners = A->rmap->range; 828 PetscSFNode *rrows; 829 PetscSF sf; 830 const PetscScalar *xx; 831 PetscScalar *bb,*mask; 832 Vec xmask,lmask; 833 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 834 const PetscInt *aj, *ii,*ridx; 835 PetscScalar *aa; 836 837 PetscFunctionBegin; 838 /* Create SF where leaves are input rows and roots are owned rows */ 839 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 840 for (r = 0; r < n; ++r) lrows[r] = -1; 841 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 842 for (r = 0; r < N; ++r) { 843 const PetscInt idx = rows[r]; 844 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 845 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 846 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 847 } 848 rrows[r].rank = p; 849 rrows[r].index = rows[r] - owners[p]; 850 } 851 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 852 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 853 /* Collect flags for rows to be zeroed */ 854 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 856 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 857 /* Compress and put in row numbers */ 858 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 859 /* zero diagonal part of matrix */ 860 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 861 /* handle off diagonal part of matrix */ 862 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 863 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 864 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 865 for (i=0; i<len; i++) bb[lrows[i]] = 1; 866 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 867 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 870 if (x) { 871 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 874 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 875 } 876 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 877 /* remove zeroed rows of off diagonal matrix */ 878 ii = aij->i; 879 for (i=0; i<len; i++) { 880 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 /* loop over all elements of off process part of matrix zeroing removed columns*/ 883 if (aij->compressedrow.use) { 884 m = aij->compressedrow.nrows; 885 ii = aij->compressedrow.i; 886 ridx = aij->compressedrow.rindex; 887 for (i=0; i<m; i++) { 888 n = ii[i+1] - ii[i]; 889 aj = aij->j + ii[i]; 890 aa = aij->a + ii[i]; 891 892 for (j=0; j<n; j++) { 893 if (PetscAbsScalar(mask[*aj])) { 894 if (b) bb[*ridx] -= *aa*xx[*aj]; 895 *aa = 0.0; 896 } 897 aa++; 898 aj++; 899 } 900 ridx++; 901 } 902 } else { /* do not use compressed row format */ 903 m = l->B->rmap->n; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[i] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 } 917 } 918 if (x) { 919 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 920 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 } 922 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 923 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 924 ierr = PetscFree(lrows);CHKERRQ(ierr); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 928 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 929 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatMult_MPIAIJ" 936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 937 { 938 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 939 PetscErrorCode ierr; 940 PetscInt nt; 941 942 PetscFunctionBegin; 943 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 944 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 945 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 947 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 955 { 956 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatMultAdd_MPIAIJ" 966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 974 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 982 { 983 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscBool merged; 986 987 PetscFunctionBegin; 988 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 989 /* do nondiagonal part */ 990 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 991 if (!merged) { 992 /* send it on its way */ 993 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 994 /* do local part */ 995 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 996 /* receive remote parts: note this assumes the values are not actually */ 997 /* added in yy until the next line, */ 998 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 } else { 1000 /* do local part */ 1001 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1002 /* send it on its way */ 1003 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1004 /* values actually were received in the Begin() but we need to call this nop */ 1005 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1006 } 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1012 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1013 { 1014 MPI_Comm comm; 1015 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1016 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1017 IS Me,Notme; 1018 PetscErrorCode ierr; 1019 PetscInt M,N,first,last,*notme,i; 1020 PetscMPIInt size; 1021 1022 PetscFunctionBegin; 1023 /* Easy test: symmetric diagonal block */ 1024 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1025 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1026 if (!*f) PetscFunctionReturn(0); 1027 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1028 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1029 if (size == 1) PetscFunctionReturn(0); 1030 1031 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1032 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1033 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1034 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1035 for (i=0; i<first; i++) notme[i] = i; 1036 for (i=last; i<M; i++) notme[i-last+first] = i; 1037 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1038 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1039 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1040 Aoff = Aoffs[0]; 1041 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1042 Boff = Boffs[0]; 1043 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1044 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1045 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1046 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1047 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1048 ierr = PetscFree(notme);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 /* do nondiagonal part */ 1061 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1062 /* send it on its way */ 1063 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 /* do local part */ 1065 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1066 /* receive remote parts */ 1067 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 /* 1072 This only works correctly for square matrices where the subblock A->A is the 1073 diagonal block 1074 */ 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1077 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1078 { 1079 PetscErrorCode ierr; 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1081 1082 PetscFunctionBegin; 1083 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1084 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1085 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatScale_MPIAIJ" 1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1092 { 1093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1098 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatDestroy_Redundant" 1104 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1105 { 1106 PetscErrorCode ierr; 1107 Mat_Redundant *redund = *redundant; 1108 PetscInt i; 1109 1110 PetscFunctionBegin; 1111 *redundant = NULL; 1112 if (redund){ 1113 if (redund->matseq) { /* via MatGetSubMatrices() */ 1114 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1115 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1116 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1117 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1118 } else { 1119 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1120 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1121 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1122 for (i=0; i<redund->nrecvs; i++) { 1123 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1124 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1125 } 1126 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1127 } 1128 1129 if (redund->psubcomm) { 1130 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1131 } 1132 ierr = PetscFree(redund);CHKERRQ(ierr); 1133 } 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatDestroy_MPIAIJ" 1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1140 { 1141 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 #if defined(PETSC_USE_LOG) 1146 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1147 #endif 1148 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1149 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1150 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1151 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1152 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1153 #if defined(PETSC_USE_CTABLE) 1154 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1155 #else 1156 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1157 #endif 1158 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1159 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1160 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1161 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1162 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1163 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1164 1165 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1180 { 1181 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1182 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1183 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1184 PetscErrorCode ierr; 1185 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1186 int fd; 1187 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1188 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1189 PetscScalar *column_values; 1190 PetscInt message_count,flowcontrolcount; 1191 FILE *file; 1192 1193 PetscFunctionBegin; 1194 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1195 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1196 nz = A->nz + B->nz; 1197 if (!rank) { 1198 header[0] = MAT_FILE_CLASSID; 1199 header[1] = mat->rmap->N; 1200 header[2] = mat->cmap->N; 1201 1202 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1203 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1204 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1205 /* get largest number of rows any processor has */ 1206 rlen = mat->rmap->n; 1207 range = mat->rmap->range; 1208 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1209 } else { 1210 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1211 rlen = mat->rmap->n; 1212 } 1213 1214 /* load up the local row counts */ 1215 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1216 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1217 1218 /* store the row lengths to the file */ 1219 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1220 if (!rank) { 1221 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1222 for (i=1; i<size; i++) { 1223 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1224 rlen = range[i+1] - range[i]; 1225 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1227 } 1228 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1229 } else { 1230 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1231 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1235 1236 /* load up the local column indices */ 1237 nzmax = nz; /* th processor needs space a largest processor needs */ 1238 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1240 cnt = 0; 1241 for (i=0; i<mat->rmap->n; i++) { 1242 for (j=B->i[i]; j<B->i[i+1]; j++) { 1243 if ((col = garray[B->j[j]]) > cstart) break; 1244 column_indices[cnt++] = col; 1245 } 1246 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1247 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1248 } 1249 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1250 1251 /* store the column indices to the file */ 1252 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1253 if (!rank) { 1254 MPI_Status status; 1255 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1256 for (i=1; i<size; i++) { 1257 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1258 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1259 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1260 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1261 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1262 } 1263 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1264 } else { 1265 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1266 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1267 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1269 } 1270 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1271 1272 /* load up the local column values */ 1273 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1274 cnt = 0; 1275 for (i=0; i<mat->rmap->n; i++) { 1276 for (j=B->i[i]; j<B->i[i+1]; j++) { 1277 if (garray[B->j[j]] > cstart) break; 1278 column_values[cnt++] = B->a[j]; 1279 } 1280 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1281 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1282 } 1283 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1284 1285 /* store the column values to the file */ 1286 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1287 if (!rank) { 1288 MPI_Status status; 1289 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1290 for (i=1; i<size; i++) { 1291 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1292 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1293 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1294 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1295 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1296 } 1297 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1298 } else { 1299 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1300 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1301 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1303 } 1304 ierr = PetscFree(column_values);CHKERRQ(ierr); 1305 1306 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1307 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #include <petscdraw.h> 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1314 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1315 { 1316 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank = aij->rank,size = aij->size; 1319 PetscBool isdraw,iascii,isbinary; 1320 PetscViewer sviewer; 1321 PetscViewerFormat format; 1322 1323 PetscFunctionBegin; 1324 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1325 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1326 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1327 if (iascii) { 1328 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1329 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1330 MatInfo info; 1331 PetscBool inodes; 1332 1333 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1334 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1335 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1336 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1337 if (!inodes) { 1338 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1339 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1340 } else { 1341 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1342 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1343 } 1344 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1345 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1346 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1347 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1348 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1349 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1350 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1351 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1354 PetscInt inodecount,inodelimit,*inodes; 1355 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1356 if (inodes) { 1357 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1358 } else { 1359 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1360 } 1361 PetscFunctionReturn(0); 1362 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1363 PetscFunctionReturn(0); 1364 } 1365 } else if (isbinary) { 1366 if (size == 1) { 1367 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1368 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1369 } else { 1370 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1371 } 1372 PetscFunctionReturn(0); 1373 } else if (isdraw) { 1374 PetscDraw draw; 1375 PetscBool isnull; 1376 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1377 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1378 } 1379 1380 { 1381 /* assemble the entire matrix onto first processor. */ 1382 Mat A; 1383 Mat_SeqAIJ *Aloc; 1384 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1385 MatScalar *a; 1386 1387 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1388 if (!rank) { 1389 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1390 } else { 1391 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1392 } 1393 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1394 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1395 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1396 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1397 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1398 1399 /* copy over the A part */ 1400 Aloc = (Mat_SeqAIJ*)aij->A->data; 1401 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1402 row = mat->rmap->rstart; 1403 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1404 for (i=0; i<m; i++) { 1405 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1406 row++; 1407 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1408 } 1409 aj = Aloc->j; 1410 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1411 1412 /* copy over the B part */ 1413 Aloc = (Mat_SeqAIJ*)aij->B->data; 1414 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1415 row = mat->rmap->rstart; 1416 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1417 ct = cols; 1418 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1419 for (i=0; i<m; i++) { 1420 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1421 row++; 1422 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1423 } 1424 ierr = PetscFree(ct);CHKERRQ(ierr); 1425 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1426 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1427 /* 1428 Everyone has to call to draw the matrix since the graphics waits are 1429 synchronized across all processors that share the PetscDraw object 1430 */ 1431 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1432 if (!rank) { 1433 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1434 } 1435 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1436 ierr = MatDestroy(&A);CHKERRQ(ierr); 1437 } 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatView_MPIAIJ" 1443 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1444 { 1445 PetscErrorCode ierr; 1446 PetscBool iascii,isdraw,issocket,isbinary; 1447 1448 PetscFunctionBegin; 1449 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1450 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1451 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1452 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1453 if (iascii || isdraw || isbinary || issocket) { 1454 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatSOR_MPIAIJ" 1461 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1462 { 1463 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1464 PetscErrorCode ierr; 1465 Vec bb1 = 0; 1466 PetscBool hasop; 1467 1468 PetscFunctionBegin; 1469 if (flag == SOR_APPLY_UPPER) { 1470 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1475 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1476 } 1477 1478 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1479 if (flag & SOR_ZERO_INITIAL_GUESS) { 1480 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1481 its--; 1482 } 1483 1484 while (its--) { 1485 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 1488 /* update rhs: bb1 = bb - B*x */ 1489 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1490 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1491 1492 /* local sweep */ 1493 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1494 } 1495 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1496 if (flag & SOR_ZERO_INITIAL_GUESS) { 1497 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1498 its--; 1499 } 1500 while (its--) { 1501 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1502 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 1504 /* update rhs: bb1 = bb - B*x */ 1505 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1506 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1507 1508 /* local sweep */ 1509 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1510 } 1511 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1512 if (flag & SOR_ZERO_INITIAL_GUESS) { 1513 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1514 its--; 1515 } 1516 while (its--) { 1517 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 1520 /* update rhs: bb1 = bb - B*x */ 1521 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1522 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1523 1524 /* local sweep */ 1525 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1526 } 1527 } else if (flag & SOR_EISENSTAT) { 1528 Vec xx1; 1529 1530 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1531 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1532 1533 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1534 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1535 if (!mat->diag) { 1536 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1537 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1538 } 1539 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1540 if (hasop) { 1541 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1542 } else { 1543 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1544 } 1545 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1546 1547 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1548 1549 /* local sweep */ 1550 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1551 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1552 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1553 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1554 1555 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatPermute_MPIAIJ" 1561 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1562 { 1563 Mat aA,aB,Aperm; 1564 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1565 PetscScalar *aa,*ba; 1566 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1567 PetscSF rowsf,sf; 1568 IS parcolp = NULL; 1569 PetscBool done; 1570 PetscErrorCode ierr; 1571 1572 PetscFunctionBegin; 1573 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1574 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1575 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1576 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1577 1578 /* Invert row permutation to find out where my rows should go */ 1579 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1580 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1581 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1582 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1583 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1584 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1585 1586 /* Invert column permutation to find out where my columns should go */ 1587 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1588 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1589 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1590 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1591 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1592 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1593 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1594 1595 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1596 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1597 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1598 1599 /* Find out where my gcols should go */ 1600 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1601 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1602 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1603 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1604 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1605 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1606 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1607 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1608 1609 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1610 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1611 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1612 for (i=0; i<m; i++) { 1613 PetscInt row = rdest[i],rowner; 1614 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1615 for (j=ai[i]; j<ai[i+1]; j++) { 1616 PetscInt cowner,col = cdest[aj[j]]; 1617 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1618 if (rowner == cowner) dnnz[i]++; 1619 else onnz[i]++; 1620 } 1621 for (j=bi[i]; j<bi[i+1]; j++) { 1622 PetscInt cowner,col = gcdest[bj[j]]; 1623 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1624 if (rowner == cowner) dnnz[i]++; 1625 else onnz[i]++; 1626 } 1627 } 1628 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1629 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1630 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1631 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1632 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1633 1634 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1635 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1636 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1637 for (i=0; i<m; i++) { 1638 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1639 PetscInt j0,rowlen; 1640 rowlen = ai[i+1] - ai[i]; 1641 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1642 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1643 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1644 } 1645 rowlen = bi[i+1] - bi[i]; 1646 for (j0=j=0; j<rowlen; j0=j) { 1647 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1648 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1649 } 1650 } 1651 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1652 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1653 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1654 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1655 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1656 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1657 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1658 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1659 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1660 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1661 *B = Aperm; 1662 PetscFunctionReturn(0); 1663 } 1664 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1667 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1668 { 1669 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1670 Mat A = mat->A,B = mat->B; 1671 PetscErrorCode ierr; 1672 PetscReal isend[5],irecv[5]; 1673 1674 PetscFunctionBegin; 1675 info->block_size = 1.0; 1676 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1677 1678 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1679 isend[3] = info->memory; isend[4] = info->mallocs; 1680 1681 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1682 1683 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1684 isend[3] += info->memory; isend[4] += info->mallocs; 1685 if (flag == MAT_LOCAL) { 1686 info->nz_used = isend[0]; 1687 info->nz_allocated = isend[1]; 1688 info->nz_unneeded = isend[2]; 1689 info->memory = isend[3]; 1690 info->mallocs = isend[4]; 1691 } else if (flag == MAT_GLOBAL_MAX) { 1692 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1693 1694 info->nz_used = irecv[0]; 1695 info->nz_allocated = irecv[1]; 1696 info->nz_unneeded = irecv[2]; 1697 info->memory = irecv[3]; 1698 info->mallocs = irecv[4]; 1699 } else if (flag == MAT_GLOBAL_SUM) { 1700 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1701 1702 info->nz_used = irecv[0]; 1703 info->nz_allocated = irecv[1]; 1704 info->nz_unneeded = irecv[2]; 1705 info->memory = irecv[3]; 1706 info->mallocs = irecv[4]; 1707 } 1708 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1709 info->fill_ratio_needed = 0; 1710 info->factor_mallocs = 0; 1711 PetscFunctionReturn(0); 1712 } 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatSetOption_MPIAIJ" 1716 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1717 { 1718 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1719 PetscErrorCode ierr; 1720 1721 PetscFunctionBegin; 1722 switch (op) { 1723 case MAT_NEW_NONZERO_LOCATIONS: 1724 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1725 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1726 case MAT_KEEP_NONZERO_PATTERN: 1727 case MAT_NEW_NONZERO_LOCATION_ERR: 1728 case MAT_USE_INODES: 1729 case MAT_IGNORE_ZERO_ENTRIES: 1730 MatCheckPreallocated(A,1); 1731 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1732 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1733 break; 1734 case MAT_ROW_ORIENTED: 1735 a->roworiented = flg; 1736 1737 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1738 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1739 break; 1740 case MAT_NEW_DIAGONALS: 1741 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1742 break; 1743 case MAT_IGNORE_OFF_PROC_ENTRIES: 1744 a->donotstash = flg; 1745 break; 1746 case MAT_SPD: 1747 A->spd_set = PETSC_TRUE; 1748 A->spd = flg; 1749 if (flg) { 1750 A->symmetric = PETSC_TRUE; 1751 A->structurally_symmetric = PETSC_TRUE; 1752 A->symmetric_set = PETSC_TRUE; 1753 A->structurally_symmetric_set = PETSC_TRUE; 1754 } 1755 break; 1756 case MAT_SYMMETRIC: 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 break; 1759 case MAT_STRUCTURALLY_SYMMETRIC: 1760 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1761 break; 1762 case MAT_HERMITIAN: 1763 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1764 break; 1765 case MAT_SYMMETRY_ETERNAL: 1766 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1767 break; 1768 default: 1769 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1770 } 1771 PetscFunctionReturn(0); 1772 } 1773 1774 #undef __FUNCT__ 1775 #define __FUNCT__ "MatGetRow_MPIAIJ" 1776 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1777 { 1778 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1779 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1780 PetscErrorCode ierr; 1781 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1782 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1783 PetscInt *cmap,*idx_p; 1784 1785 PetscFunctionBegin; 1786 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1787 mat->getrowactive = PETSC_TRUE; 1788 1789 if (!mat->rowvalues && (idx || v)) { 1790 /* 1791 allocate enough space to hold information from the longest row. 1792 */ 1793 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1794 PetscInt max = 1,tmp; 1795 for (i=0; i<matin->rmap->n; i++) { 1796 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1797 if (max < tmp) max = tmp; 1798 } 1799 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1800 } 1801 1802 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1803 lrow = row - rstart; 1804 1805 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1806 if (!v) {pvA = 0; pvB = 0;} 1807 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1808 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1809 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1810 nztot = nzA + nzB; 1811 1812 cmap = mat->garray; 1813 if (v || idx) { 1814 if (nztot) { 1815 /* Sort by increasing column numbers, assuming A and B already sorted */ 1816 PetscInt imark = -1; 1817 if (v) { 1818 *v = v_p = mat->rowvalues; 1819 for (i=0; i<nzB; i++) { 1820 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1821 else break; 1822 } 1823 imark = i; 1824 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1825 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1826 } 1827 if (idx) { 1828 *idx = idx_p = mat->rowindices; 1829 if (imark > -1) { 1830 for (i=0; i<imark; i++) { 1831 idx_p[i] = cmap[cworkB[i]]; 1832 } 1833 } else { 1834 for (i=0; i<nzB; i++) { 1835 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1836 else break; 1837 } 1838 imark = i; 1839 } 1840 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1841 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1842 } 1843 } else { 1844 if (idx) *idx = 0; 1845 if (v) *v = 0; 1846 } 1847 } 1848 *nz = nztot; 1849 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1850 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1851 PetscFunctionReturn(0); 1852 } 1853 1854 #undef __FUNCT__ 1855 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1856 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1857 { 1858 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1859 1860 PetscFunctionBegin; 1861 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1862 aij->getrowactive = PETSC_FALSE; 1863 PetscFunctionReturn(0); 1864 } 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "MatNorm_MPIAIJ" 1868 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1869 { 1870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1871 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1872 PetscErrorCode ierr; 1873 PetscInt i,j,cstart = mat->cmap->rstart; 1874 PetscReal sum = 0.0; 1875 MatScalar *v; 1876 1877 PetscFunctionBegin; 1878 if (aij->size == 1) { 1879 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1880 } else { 1881 if (type == NORM_FROBENIUS) { 1882 v = amat->a; 1883 for (i=0; i<amat->nz; i++) { 1884 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1885 } 1886 v = bmat->a; 1887 for (i=0; i<bmat->nz; i++) { 1888 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1889 } 1890 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1891 *norm = PetscSqrtReal(*norm); 1892 } else if (type == NORM_1) { /* max column norm */ 1893 PetscReal *tmp,*tmp2; 1894 PetscInt *jj,*garray = aij->garray; 1895 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1896 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1897 *norm = 0.0; 1898 v = amat->a; jj = amat->j; 1899 for (j=0; j<amat->nz; j++) { 1900 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1901 } 1902 v = bmat->a; jj = bmat->j; 1903 for (j=0; j<bmat->nz; j++) { 1904 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1905 } 1906 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1907 for (j=0; j<mat->cmap->N; j++) { 1908 if (tmp2[j] > *norm) *norm = tmp2[j]; 1909 } 1910 ierr = PetscFree(tmp);CHKERRQ(ierr); 1911 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1912 } else if (type == NORM_INFINITY) { /* max row norm */ 1913 PetscReal ntemp = 0.0; 1914 for (j=0; j<aij->A->rmap->n; j++) { 1915 v = amat->a + amat->i[j]; 1916 sum = 0.0; 1917 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1918 sum += PetscAbsScalar(*v); v++; 1919 } 1920 v = bmat->a + bmat->i[j]; 1921 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1922 sum += PetscAbsScalar(*v); v++; 1923 } 1924 if (sum > ntemp) ntemp = sum; 1925 } 1926 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1927 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1928 } 1929 PetscFunctionReturn(0); 1930 } 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatTranspose_MPIAIJ" 1934 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1935 { 1936 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1937 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1938 PetscErrorCode ierr; 1939 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1940 PetscInt cstart = A->cmap->rstart,ncol; 1941 Mat B; 1942 MatScalar *array; 1943 1944 PetscFunctionBegin; 1945 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1946 1947 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1948 ai = Aloc->i; aj = Aloc->j; 1949 bi = Bloc->i; bj = Bloc->j; 1950 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1951 PetscInt *d_nnz,*g_nnz,*o_nnz; 1952 PetscSFNode *oloc; 1953 PETSC_UNUSED PetscSF sf; 1954 1955 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1956 /* compute d_nnz for preallocation */ 1957 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1958 for (i=0; i<ai[ma]; i++) { 1959 d_nnz[aj[i]]++; 1960 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1961 } 1962 /* compute local off-diagonal contributions */ 1963 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1964 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1965 /* map those to global */ 1966 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1967 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1968 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1969 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1970 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1971 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1972 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1973 1974 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1975 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1976 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1977 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1978 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1979 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1980 } else { 1981 B = *matout; 1982 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1983 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1984 } 1985 1986 /* copy over the A part */ 1987 array = Aloc->a; 1988 row = A->rmap->rstart; 1989 for (i=0; i<ma; i++) { 1990 ncol = ai[i+1]-ai[i]; 1991 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1992 row++; 1993 array += ncol; aj += ncol; 1994 } 1995 aj = Aloc->j; 1996 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1997 1998 /* copy over the B part */ 1999 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2000 array = Bloc->a; 2001 row = A->rmap->rstart; 2002 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2003 cols_tmp = cols; 2004 for (i=0; i<mb; i++) { 2005 ncol = bi[i+1]-bi[i]; 2006 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2007 row++; 2008 array += ncol; cols_tmp += ncol; 2009 } 2010 ierr = PetscFree(cols);CHKERRQ(ierr); 2011 2012 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2013 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2014 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2015 *matout = B; 2016 } else { 2017 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2018 } 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2024 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2025 { 2026 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2027 Mat a = aij->A,b = aij->B; 2028 PetscErrorCode ierr; 2029 PetscInt s1,s2,s3; 2030 2031 PetscFunctionBegin; 2032 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2033 if (rr) { 2034 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2035 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2036 /* Overlap communication with computation. */ 2037 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2038 } 2039 if (ll) { 2040 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2041 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2042 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2043 } 2044 /* scale the diagonal block */ 2045 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2046 2047 if (rr) { 2048 /* Do a scatter end and then right scale the off-diagonal block */ 2049 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2050 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2051 } 2052 PetscFunctionReturn(0); 2053 } 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2057 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2058 { 2059 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2060 PetscErrorCode ierr; 2061 2062 PetscFunctionBegin; 2063 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "MatEqual_MPIAIJ" 2069 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2070 { 2071 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2072 Mat a,b,c,d; 2073 PetscBool flg; 2074 PetscErrorCode ierr; 2075 2076 PetscFunctionBegin; 2077 a = matA->A; b = matA->B; 2078 c = matB->A; d = matB->B; 2079 2080 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2081 if (flg) { 2082 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2083 } 2084 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2085 PetscFunctionReturn(0); 2086 } 2087 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatCopy_MPIAIJ" 2090 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2091 { 2092 PetscErrorCode ierr; 2093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2094 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2095 2096 PetscFunctionBegin; 2097 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2098 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2099 /* because of the column compression in the off-processor part of the matrix a->B, 2100 the number of columns in a->B and b->B may be different, hence we cannot call 2101 the MatCopy() directly on the two parts. If need be, we can provide a more 2102 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2103 then copying the submatrices */ 2104 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2105 } else { 2106 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2107 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2108 } 2109 PetscFunctionReturn(0); 2110 } 2111 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "MatSetUp_MPIAIJ" 2114 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2115 { 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 /* 2124 Computes the number of nonzeros per row needed for preallocation when X and Y 2125 have different nonzero structure. 2126 */ 2127 #undef __FUNCT__ 2128 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2129 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2130 { 2131 PetscInt i,j,k,nzx,nzy; 2132 2133 PetscFunctionBegin; 2134 /* Set the number of nonzeros in the new matrix */ 2135 for (i=0; i<m; i++) { 2136 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2137 nzx = xi[i+1] - xi[i]; 2138 nzy = yi[i+1] - yi[i]; 2139 nnz[i] = 0; 2140 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2141 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2142 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2143 nnz[i]++; 2144 } 2145 for (; k<nzy; k++) nnz[i]++; 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2153 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2154 { 2155 PetscErrorCode ierr; 2156 PetscInt m = Y->rmap->N; 2157 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2158 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2159 2160 PetscFunctionBegin; 2161 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2162 PetscFunctionReturn(0); 2163 } 2164 2165 #undef __FUNCT__ 2166 #define __FUNCT__ "MatAXPY_MPIAIJ" 2167 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2168 { 2169 PetscErrorCode ierr; 2170 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2171 PetscBLASInt bnz,one=1; 2172 Mat_SeqAIJ *x,*y; 2173 2174 PetscFunctionBegin; 2175 if (str == SAME_NONZERO_PATTERN) { 2176 PetscScalar alpha = a; 2177 x = (Mat_SeqAIJ*)xx->A->data; 2178 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2179 y = (Mat_SeqAIJ*)yy->A->data; 2180 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2181 x = (Mat_SeqAIJ*)xx->B->data; 2182 y = (Mat_SeqAIJ*)yy->B->data; 2183 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2184 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2185 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2186 } else if (str == SUBSET_NONZERO_PATTERN) { 2187 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2188 } else { 2189 Mat B; 2190 PetscInt *nnz_d,*nnz_o; 2191 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2192 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2193 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2194 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2195 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2196 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2197 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2198 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2199 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2200 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2201 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2202 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2203 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2204 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2205 } 2206 PetscFunctionReturn(0); 2207 } 2208 2209 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2210 2211 #undef __FUNCT__ 2212 #define __FUNCT__ "MatConjugate_MPIAIJ" 2213 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2214 { 2215 #if defined(PETSC_USE_COMPLEX) 2216 PetscErrorCode ierr; 2217 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2218 2219 PetscFunctionBegin; 2220 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2221 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2222 #else 2223 PetscFunctionBegin; 2224 #endif 2225 PetscFunctionReturn(0); 2226 } 2227 2228 #undef __FUNCT__ 2229 #define __FUNCT__ "MatRealPart_MPIAIJ" 2230 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2231 { 2232 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2233 PetscErrorCode ierr; 2234 2235 PetscFunctionBegin; 2236 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2237 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2238 PetscFunctionReturn(0); 2239 } 2240 2241 #undef __FUNCT__ 2242 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2243 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2244 { 2245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2246 PetscErrorCode ierr; 2247 2248 PetscFunctionBegin; 2249 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2250 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2251 PetscFunctionReturn(0); 2252 } 2253 2254 #if defined(PETSC_HAVE_PBGL) 2255 2256 #include <boost/parallel/mpi/bsp_process_group.hpp> 2257 #include <boost/graph/distributed/ilu_default_graph.hpp> 2258 #include <boost/graph/distributed/ilu_0_block.hpp> 2259 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2260 #include <boost/graph/distributed/petsc/interface.hpp> 2261 #include <boost/multi_array.hpp> 2262 #include <boost/parallel/distributed_property_map->hpp> 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2266 /* 2267 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2268 */ 2269 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2270 { 2271 namespace petsc = boost::distributed::petsc; 2272 2273 namespace graph_dist = boost::graph::distributed; 2274 using boost::graph::distributed::ilu_default::process_group_type; 2275 using boost::graph::ilu_permuted; 2276 2277 PetscBool row_identity, col_identity; 2278 PetscContainer c; 2279 PetscInt m, n, M, N; 2280 PetscErrorCode ierr; 2281 2282 PetscFunctionBegin; 2283 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2284 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2285 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2286 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2287 2288 process_group_type pg; 2289 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2290 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2291 lgraph_type& level_graph = *lgraph_p; 2292 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2293 2294 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2295 ilu_permuted(level_graph); 2296 2297 /* put together the new matrix */ 2298 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2299 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2300 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2301 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2302 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2303 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2304 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2305 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2306 2307 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2308 ierr = PetscContainerSetPointer(c, lgraph_p); 2309 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2310 ierr = PetscContainerDestroy(&c); 2311 PetscFunctionReturn(0); 2312 } 2313 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2316 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2317 { 2318 PetscFunctionBegin; 2319 PetscFunctionReturn(0); 2320 } 2321 2322 #undef __FUNCT__ 2323 #define __FUNCT__ "MatSolve_MPIAIJ" 2324 /* 2325 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2326 */ 2327 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2328 { 2329 namespace graph_dist = boost::graph::distributed; 2330 2331 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2332 lgraph_type *lgraph_p; 2333 PetscContainer c; 2334 PetscErrorCode ierr; 2335 2336 PetscFunctionBegin; 2337 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2338 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2339 ierr = VecCopy(b, x);CHKERRQ(ierr); 2340 2341 PetscScalar *array_x; 2342 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2343 PetscInt sx; 2344 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2345 2346 PetscScalar *array_b; 2347 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2348 PetscInt sb; 2349 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2350 2351 lgraph_type& level_graph = *lgraph_p; 2352 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2353 2354 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2355 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2356 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2357 2358 typedef boost::iterator_property_map<array_ref_type::iterator, 2359 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2360 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2361 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2362 2363 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2364 PetscFunctionReturn(0); 2365 } 2366 #endif 2367 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2371 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2372 { 2373 PetscMPIInt rank,size; 2374 MPI_Comm comm; 2375 PetscErrorCode ierr; 2376 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2377 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2378 PetscInt *rowrange = mat->rmap->range; 2379 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2380 Mat A = aij->A,B=aij->B,C=*matredundant; 2381 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2382 PetscScalar *sbuf_a; 2383 PetscInt nzlocal=a->nz+b->nz; 2384 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2385 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2386 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2387 MatScalar *aworkA,*aworkB; 2388 PetscScalar *vals; 2389 PetscMPIInt tag1,tag2,tag3,imdex; 2390 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2391 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2392 MPI_Status recv_status,*send_status; 2393 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2394 PetscInt **rbuf_j=NULL; 2395 PetscScalar **rbuf_a=NULL; 2396 Mat_Redundant *redund =NULL; 2397 2398 PetscFunctionBegin; 2399 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2400 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2401 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2402 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2403 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2404 2405 if (reuse == MAT_REUSE_MATRIX) { 2406 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2407 if (subsize == 1) { 2408 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2409 redund = c->redundant; 2410 } else { 2411 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2412 redund = c->redundant; 2413 } 2414 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2415 2416 nsends = redund->nsends; 2417 nrecvs = redund->nrecvs; 2418 send_rank = redund->send_rank; 2419 recv_rank = redund->recv_rank; 2420 sbuf_nz = redund->sbuf_nz; 2421 rbuf_nz = redund->rbuf_nz; 2422 sbuf_j = redund->sbuf_j; 2423 sbuf_a = redund->sbuf_a; 2424 rbuf_j = redund->rbuf_j; 2425 rbuf_a = redund->rbuf_a; 2426 } 2427 2428 if (reuse == MAT_INITIAL_MATRIX) { 2429 PetscInt nleftover,np_subcomm; 2430 2431 /* get the destination processors' id send_rank, nsends and nrecvs */ 2432 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2433 2434 np_subcomm = size/nsubcomm; 2435 nleftover = size - nsubcomm*np_subcomm; 2436 2437 /* block of codes below is specific for INTERLACED */ 2438 /* ------------------------------------------------*/ 2439 nsends = 0; nrecvs = 0; 2440 for (i=0; i<size; i++) { 2441 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2442 send_rank[nsends++] = i; 2443 recv_rank[nrecvs++] = i; 2444 } 2445 } 2446 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2447 i = size-nleftover-1; 2448 j = 0; 2449 while (j < nsubcomm - nleftover) { 2450 send_rank[nsends++] = i; 2451 i--; j++; 2452 } 2453 } 2454 2455 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2456 for (i=0; i<nleftover; i++) { 2457 recv_rank[nrecvs++] = size-nleftover+i; 2458 } 2459 } 2460 /*----------------------------------------------*/ 2461 2462 /* allocate sbuf_j, sbuf_a */ 2463 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2464 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2465 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2466 /* 2467 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2468 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2469 */ 2470 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2471 2472 /* copy mat's local entries into the buffers */ 2473 if (reuse == MAT_INITIAL_MATRIX) { 2474 rownz_max = 0; 2475 rptr = sbuf_j; 2476 cols = sbuf_j + rend-rstart + 1; 2477 vals = sbuf_a; 2478 rptr[0] = 0; 2479 for (i=0; i<rend-rstart; i++) { 2480 row = i + rstart; 2481 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2482 ncols = nzA + nzB; 2483 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2484 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2485 /* load the column indices for this row into cols */ 2486 lwrite = 0; 2487 for (l=0; l<nzB; l++) { 2488 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2489 vals[lwrite] = aworkB[l]; 2490 cols[lwrite++] = ctmp; 2491 } 2492 } 2493 for (l=0; l<nzA; l++) { 2494 vals[lwrite] = aworkA[l]; 2495 cols[lwrite++] = cstart + cworkA[l]; 2496 } 2497 for (l=0; l<nzB; l++) { 2498 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2499 vals[lwrite] = aworkB[l]; 2500 cols[lwrite++] = ctmp; 2501 } 2502 } 2503 vals += ncols; 2504 cols += ncols; 2505 rptr[i+1] = rptr[i] + ncols; 2506 if (rownz_max < ncols) rownz_max = ncols; 2507 } 2508 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2509 } else { /* only copy matrix values into sbuf_a */ 2510 rptr = sbuf_j; 2511 vals = sbuf_a; 2512 rptr[0] = 0; 2513 for (i=0; i<rend-rstart; i++) { 2514 row = i + rstart; 2515 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2516 ncols = nzA + nzB; 2517 cworkB = b->j + b->i[i]; 2518 aworkA = a->a + a->i[i]; 2519 aworkB = b->a + b->i[i]; 2520 lwrite = 0; 2521 for (l=0; l<nzB; l++) { 2522 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2523 } 2524 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2525 for (l=0; l<nzB; l++) { 2526 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2527 } 2528 vals += ncols; 2529 rptr[i+1] = rptr[i] + ncols; 2530 } 2531 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2532 2533 /* send nzlocal to others, and recv other's nzlocal */ 2534 /*--------------------------------------------------*/ 2535 if (reuse == MAT_INITIAL_MATRIX) { 2536 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2537 2538 s_waits2 = s_waits3 + nsends; 2539 s_waits1 = s_waits2 + nsends; 2540 r_waits1 = s_waits1 + nsends; 2541 r_waits2 = r_waits1 + nrecvs; 2542 r_waits3 = r_waits2 + nrecvs; 2543 } else { 2544 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2545 2546 r_waits3 = s_waits3 + nsends; 2547 } 2548 2549 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2550 if (reuse == MAT_INITIAL_MATRIX) { 2551 /* get new tags to keep the communication clean */ 2552 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2553 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2554 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2555 2556 /* post receives of other's nzlocal */ 2557 for (i=0; i<nrecvs; i++) { 2558 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2559 } 2560 /* send nzlocal to others */ 2561 for (i=0; i<nsends; i++) { 2562 sbuf_nz[i] = nzlocal; 2563 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2564 } 2565 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2566 count = nrecvs; 2567 while (count) { 2568 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2569 2570 recv_rank[imdex] = recv_status.MPI_SOURCE; 2571 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2572 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2573 2574 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2575 2576 rbuf_nz[imdex] += i + 2; 2577 2578 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2579 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2580 count--; 2581 } 2582 /* wait on sends of nzlocal */ 2583 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2584 /* send mat->i,j to others, and recv from other's */ 2585 /*------------------------------------------------*/ 2586 for (i=0; i<nsends; i++) { 2587 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2588 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2589 } 2590 /* wait on receives of mat->i,j */ 2591 /*------------------------------*/ 2592 count = nrecvs; 2593 while (count) { 2594 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2595 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2596 count--; 2597 } 2598 /* wait on sends of mat->i,j */ 2599 /*---------------------------*/ 2600 if (nsends) { 2601 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2602 } 2603 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2604 2605 /* post receives, send and receive mat->a */ 2606 /*----------------------------------------*/ 2607 for (imdex=0; imdex<nrecvs; imdex++) { 2608 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2609 } 2610 for (i=0; i<nsends; i++) { 2611 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2612 } 2613 count = nrecvs; 2614 while (count) { 2615 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2616 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2617 count--; 2618 } 2619 if (nsends) { 2620 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2621 } 2622 2623 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2624 2625 /* create redundant matrix */ 2626 /*-------------------------*/ 2627 if (reuse == MAT_INITIAL_MATRIX) { 2628 const PetscInt *range; 2629 PetscInt rstart_sub,rend_sub,mloc_sub; 2630 2631 /* compute rownz_max for preallocation */ 2632 for (imdex=0; imdex<nrecvs; imdex++) { 2633 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2634 rptr = rbuf_j[imdex]; 2635 for (i=0; i<j; i++) { 2636 ncols = rptr[i+1] - rptr[i]; 2637 if (rownz_max < ncols) rownz_max = ncols; 2638 } 2639 } 2640 2641 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2642 2643 /* get local size of redundant matrix 2644 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2645 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2646 rstart_sub = range[nsubcomm*subrank]; 2647 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2648 rend_sub = range[nsubcomm*(subrank+1)]; 2649 } else { 2650 rend_sub = mat->rmap->N; 2651 } 2652 mloc_sub = rend_sub - rstart_sub; 2653 2654 if (M == N) { 2655 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2656 } else { /* non-square matrix */ 2657 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2658 } 2659 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2660 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2661 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2662 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2663 } else { 2664 C = *matredundant; 2665 } 2666 2667 /* insert local matrix entries */ 2668 rptr = sbuf_j; 2669 cols = sbuf_j + rend-rstart + 1; 2670 vals = sbuf_a; 2671 for (i=0; i<rend-rstart; i++) { 2672 row = i + rstart; 2673 ncols = rptr[i+1] - rptr[i]; 2674 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2675 vals += ncols; 2676 cols += ncols; 2677 } 2678 /* insert received matrix entries */ 2679 for (imdex=0; imdex<nrecvs; imdex++) { 2680 rstart = rowrange[recv_rank[imdex]]; 2681 rend = rowrange[recv_rank[imdex]+1]; 2682 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2683 rptr = rbuf_j[imdex]; 2684 cols = rbuf_j[imdex] + rend-rstart + 1; 2685 vals = rbuf_a[imdex]; 2686 for (i=0; i<rend-rstart; i++) { 2687 row = i + rstart; 2688 ncols = rptr[i+1] - rptr[i]; 2689 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2690 vals += ncols; 2691 cols += ncols; 2692 } 2693 } 2694 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2695 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2696 2697 if (reuse == MAT_INITIAL_MATRIX) { 2698 *matredundant = C; 2699 2700 /* create a supporting struct and attach it to C for reuse */ 2701 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2702 if (subsize == 1) { 2703 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2704 c->redundant = redund; 2705 } else { 2706 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2707 c->redundant = redund; 2708 } 2709 2710 redund->nzlocal = nzlocal; 2711 redund->nsends = nsends; 2712 redund->nrecvs = nrecvs; 2713 redund->send_rank = send_rank; 2714 redund->recv_rank = recv_rank; 2715 redund->sbuf_nz = sbuf_nz; 2716 redund->rbuf_nz = rbuf_nz; 2717 redund->sbuf_j = sbuf_j; 2718 redund->sbuf_a = sbuf_a; 2719 redund->rbuf_j = rbuf_j; 2720 redund->rbuf_a = rbuf_a; 2721 redund->psubcomm = NULL; 2722 } 2723 PetscFunctionReturn(0); 2724 } 2725 2726 #undef __FUNCT__ 2727 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2728 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2729 { 2730 PetscErrorCode ierr; 2731 MPI_Comm comm; 2732 PetscMPIInt size,subsize; 2733 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2734 Mat_Redundant *redund=NULL; 2735 PetscSubcomm psubcomm=NULL; 2736 MPI_Comm subcomm_in=subcomm; 2737 Mat *matseq; 2738 IS isrow,iscol; 2739 2740 PetscFunctionBegin; 2741 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2742 if (reuse == MAT_INITIAL_MATRIX) { 2743 /* create psubcomm, then get subcomm */ 2744 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2745 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2746 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2747 2748 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2749 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2750 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2751 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2752 subcomm = psubcomm->comm; 2753 } else { /* retrieve psubcomm and subcomm */ 2754 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2755 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2756 if (subsize == 1) { 2757 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2758 redund = c->redundant; 2759 } else { 2760 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2761 redund = c->redundant; 2762 } 2763 psubcomm = redund->psubcomm; 2764 } 2765 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2766 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2767 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2768 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2769 if (subsize == 1) { 2770 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2771 c->redundant->psubcomm = psubcomm; 2772 } else { 2773 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2774 c->redundant->psubcomm = psubcomm ; 2775 } 2776 } 2777 PetscFunctionReturn(0); 2778 } 2779 } 2780 2781 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2782 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2783 if (reuse == MAT_INITIAL_MATRIX) { 2784 /* create a local sequential matrix matseq[0] */ 2785 mloc_sub = PETSC_DECIDE; 2786 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2787 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2788 rstart = rend - mloc_sub; 2789 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2790 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2791 } else { /* reuse == MAT_REUSE_MATRIX */ 2792 if (subsize == 1) { 2793 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2794 redund = c->redundant; 2795 } else { 2796 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2797 redund = c->redundant; 2798 } 2799 2800 isrow = redund->isrow; 2801 iscol = redund->iscol; 2802 matseq = redund->matseq; 2803 } 2804 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2805 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2806 2807 if (reuse == MAT_INITIAL_MATRIX) { 2808 /* create a supporting struct and attach it to C for reuse */ 2809 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2810 if (subsize == 1) { 2811 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2812 c->redundant = redund; 2813 } else { 2814 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2815 c->redundant = redund; 2816 } 2817 redund->isrow = isrow; 2818 redund->iscol = iscol; 2819 redund->matseq = matseq; 2820 redund->psubcomm = psubcomm; 2821 } 2822 PetscFunctionReturn(0); 2823 } 2824 2825 #undef __FUNCT__ 2826 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2827 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2828 { 2829 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2830 PetscErrorCode ierr; 2831 PetscInt i,*idxb = 0; 2832 PetscScalar *va,*vb; 2833 Vec vtmp; 2834 2835 PetscFunctionBegin; 2836 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2837 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2838 if (idx) { 2839 for (i=0; i<A->rmap->n; i++) { 2840 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2841 } 2842 } 2843 2844 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2845 if (idx) { 2846 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2847 } 2848 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2849 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2850 2851 for (i=0; i<A->rmap->n; i++) { 2852 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2853 va[i] = vb[i]; 2854 if (idx) idx[i] = a->garray[idxb[i]]; 2855 } 2856 } 2857 2858 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2859 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2860 ierr = PetscFree(idxb);CHKERRQ(ierr); 2861 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2862 PetscFunctionReturn(0); 2863 } 2864 2865 #undef __FUNCT__ 2866 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2867 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2868 { 2869 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2870 PetscErrorCode ierr; 2871 PetscInt i,*idxb = 0; 2872 PetscScalar *va,*vb; 2873 Vec vtmp; 2874 2875 PetscFunctionBegin; 2876 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2877 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2878 if (idx) { 2879 for (i=0; i<A->cmap->n; i++) { 2880 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2881 } 2882 } 2883 2884 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2885 if (idx) { 2886 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2887 } 2888 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2889 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2890 2891 for (i=0; i<A->rmap->n; i++) { 2892 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2893 va[i] = vb[i]; 2894 if (idx) idx[i] = a->garray[idxb[i]]; 2895 } 2896 } 2897 2898 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2899 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2900 ierr = PetscFree(idxb);CHKERRQ(ierr); 2901 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2902 PetscFunctionReturn(0); 2903 } 2904 2905 #undef __FUNCT__ 2906 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2907 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2908 { 2909 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2910 PetscInt n = A->rmap->n; 2911 PetscInt cstart = A->cmap->rstart; 2912 PetscInt *cmap = mat->garray; 2913 PetscInt *diagIdx, *offdiagIdx; 2914 Vec diagV, offdiagV; 2915 PetscScalar *a, *diagA, *offdiagA; 2916 PetscInt r; 2917 PetscErrorCode ierr; 2918 2919 PetscFunctionBegin; 2920 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2921 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2922 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2923 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2924 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2925 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2926 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2927 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2928 for (r = 0; r < n; ++r) { 2929 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2930 a[r] = diagA[r]; 2931 idx[r] = cstart + diagIdx[r]; 2932 } else { 2933 a[r] = offdiagA[r]; 2934 idx[r] = cmap[offdiagIdx[r]]; 2935 } 2936 } 2937 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2938 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2939 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2940 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2941 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2942 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2943 PetscFunctionReturn(0); 2944 } 2945 2946 #undef __FUNCT__ 2947 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2948 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2949 { 2950 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2951 PetscInt n = A->rmap->n; 2952 PetscInt cstart = A->cmap->rstart; 2953 PetscInt *cmap = mat->garray; 2954 PetscInt *diagIdx, *offdiagIdx; 2955 Vec diagV, offdiagV; 2956 PetscScalar *a, *diagA, *offdiagA; 2957 PetscInt r; 2958 PetscErrorCode ierr; 2959 2960 PetscFunctionBegin; 2961 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2962 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2963 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2964 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2965 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2966 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2967 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2968 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2969 for (r = 0; r < n; ++r) { 2970 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2971 a[r] = diagA[r]; 2972 idx[r] = cstart + diagIdx[r]; 2973 } else { 2974 a[r] = offdiagA[r]; 2975 idx[r] = cmap[offdiagIdx[r]]; 2976 } 2977 } 2978 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2979 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2980 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2981 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2982 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2983 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2984 PetscFunctionReturn(0); 2985 } 2986 2987 #undef __FUNCT__ 2988 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2989 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2990 { 2991 PetscErrorCode ierr; 2992 Mat *dummy; 2993 2994 PetscFunctionBegin; 2995 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2996 *newmat = *dummy; 2997 ierr = PetscFree(dummy);CHKERRQ(ierr); 2998 PetscFunctionReturn(0); 2999 } 3000 3001 #undef __FUNCT__ 3002 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3003 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3004 { 3005 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3006 PetscErrorCode ierr; 3007 3008 PetscFunctionBegin; 3009 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3010 PetscFunctionReturn(0); 3011 } 3012 3013 #undef __FUNCT__ 3014 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3015 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3016 { 3017 PetscErrorCode ierr; 3018 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3019 3020 PetscFunctionBegin; 3021 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3022 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3023 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3024 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 3028 /* -------------------------------------------------------------------*/ 3029 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3030 MatGetRow_MPIAIJ, 3031 MatRestoreRow_MPIAIJ, 3032 MatMult_MPIAIJ, 3033 /* 4*/ MatMultAdd_MPIAIJ, 3034 MatMultTranspose_MPIAIJ, 3035 MatMultTransposeAdd_MPIAIJ, 3036 #if defined(PETSC_HAVE_PBGL) 3037 MatSolve_MPIAIJ, 3038 #else 3039 0, 3040 #endif 3041 0, 3042 0, 3043 /*10*/ 0, 3044 0, 3045 0, 3046 MatSOR_MPIAIJ, 3047 MatTranspose_MPIAIJ, 3048 /*15*/ MatGetInfo_MPIAIJ, 3049 MatEqual_MPIAIJ, 3050 MatGetDiagonal_MPIAIJ, 3051 MatDiagonalScale_MPIAIJ, 3052 MatNorm_MPIAIJ, 3053 /*20*/ MatAssemblyBegin_MPIAIJ, 3054 MatAssemblyEnd_MPIAIJ, 3055 MatSetOption_MPIAIJ, 3056 MatZeroEntries_MPIAIJ, 3057 /*24*/ MatZeroRows_MPIAIJ, 3058 0, 3059 #if defined(PETSC_HAVE_PBGL) 3060 0, 3061 #else 3062 0, 3063 #endif 3064 0, 3065 0, 3066 /*29*/ MatSetUp_MPIAIJ, 3067 #if defined(PETSC_HAVE_PBGL) 3068 0, 3069 #else 3070 0, 3071 #endif 3072 0, 3073 0, 3074 0, 3075 /*34*/ MatDuplicate_MPIAIJ, 3076 0, 3077 0, 3078 0, 3079 0, 3080 /*39*/ MatAXPY_MPIAIJ, 3081 MatGetSubMatrices_MPIAIJ, 3082 MatIncreaseOverlap_MPIAIJ, 3083 MatGetValues_MPIAIJ, 3084 MatCopy_MPIAIJ, 3085 /*44*/ MatGetRowMax_MPIAIJ, 3086 MatScale_MPIAIJ, 3087 0, 3088 0, 3089 MatZeroRowsColumns_MPIAIJ, 3090 /*49*/ MatSetRandom_MPIAIJ, 3091 0, 3092 0, 3093 0, 3094 0, 3095 /*54*/ MatFDColoringCreate_MPIXAIJ, 3096 0, 3097 MatSetUnfactored_MPIAIJ, 3098 MatPermute_MPIAIJ, 3099 0, 3100 /*59*/ MatGetSubMatrix_MPIAIJ, 3101 MatDestroy_MPIAIJ, 3102 MatView_MPIAIJ, 3103 0, 3104 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3105 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3106 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3107 0, 3108 0, 3109 0, 3110 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3111 MatGetRowMinAbs_MPIAIJ, 3112 0, 3113 MatSetColoring_MPIAIJ, 3114 0, 3115 MatSetValuesAdifor_MPIAIJ, 3116 /*75*/ MatFDColoringApply_AIJ, 3117 0, 3118 0, 3119 0, 3120 MatFindZeroDiagonals_MPIAIJ, 3121 /*80*/ 0, 3122 0, 3123 0, 3124 /*83*/ MatLoad_MPIAIJ, 3125 0, 3126 0, 3127 0, 3128 0, 3129 0, 3130 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3131 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3132 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3133 MatPtAP_MPIAIJ_MPIAIJ, 3134 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3135 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3136 0, 3137 0, 3138 0, 3139 0, 3140 /*99*/ 0, 3141 0, 3142 0, 3143 MatConjugate_MPIAIJ, 3144 0, 3145 /*104*/MatSetValuesRow_MPIAIJ, 3146 MatRealPart_MPIAIJ, 3147 MatImaginaryPart_MPIAIJ, 3148 0, 3149 0, 3150 /*109*/0, 3151 MatGetRedundantMatrix_MPIAIJ, 3152 MatGetRowMin_MPIAIJ, 3153 0, 3154 0, 3155 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3156 0, 3157 0, 3158 0, 3159 0, 3160 /*119*/0, 3161 0, 3162 0, 3163 0, 3164 MatGetMultiProcBlock_MPIAIJ, 3165 /*124*/MatFindNonzeroRows_MPIAIJ, 3166 MatGetColumnNorms_MPIAIJ, 3167 MatInvertBlockDiagonal_MPIAIJ, 3168 0, 3169 MatGetSubMatricesParallel_MPIAIJ, 3170 /*129*/0, 3171 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3172 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3173 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3174 0, 3175 /*134*/0, 3176 0, 3177 0, 3178 0, 3179 0, 3180 /*139*/0, 3181 0, 3182 0, 3183 MatFDColoringSetUp_MPIXAIJ 3184 }; 3185 3186 /* ----------------------------------------------------------------------------------------*/ 3187 3188 #undef __FUNCT__ 3189 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3190 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3191 { 3192 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3193 PetscErrorCode ierr; 3194 3195 PetscFunctionBegin; 3196 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3197 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3198 PetscFunctionReturn(0); 3199 } 3200 3201 #undef __FUNCT__ 3202 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3203 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3204 { 3205 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3206 PetscErrorCode ierr; 3207 3208 PetscFunctionBegin; 3209 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3210 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3211 PetscFunctionReturn(0); 3212 } 3213 3214 #undef __FUNCT__ 3215 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3216 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3217 { 3218 Mat_MPIAIJ *b; 3219 PetscErrorCode ierr; 3220 3221 PetscFunctionBegin; 3222 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3223 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3224 b = (Mat_MPIAIJ*)B->data; 3225 3226 if (!B->preallocated) { 3227 /* Explicitly create 2 MATSEQAIJ matrices. */ 3228 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3229 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3230 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3231 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3232 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3233 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3234 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3235 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3236 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3237 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3238 } 3239 3240 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3241 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3242 B->preallocated = PETSC_TRUE; 3243 PetscFunctionReturn(0); 3244 } 3245 3246 #undef __FUNCT__ 3247 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3248 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3249 { 3250 Mat mat; 3251 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3252 PetscErrorCode ierr; 3253 3254 PetscFunctionBegin; 3255 *newmat = 0; 3256 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3257 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3258 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3259 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3260 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3261 a = (Mat_MPIAIJ*)mat->data; 3262 3263 mat->factortype = matin->factortype; 3264 mat->assembled = PETSC_TRUE; 3265 mat->insertmode = NOT_SET_VALUES; 3266 mat->preallocated = PETSC_TRUE; 3267 3268 a->size = oldmat->size; 3269 a->rank = oldmat->rank; 3270 a->donotstash = oldmat->donotstash; 3271 a->roworiented = oldmat->roworiented; 3272 a->rowindices = 0; 3273 a->rowvalues = 0; 3274 a->getrowactive = PETSC_FALSE; 3275 3276 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3277 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3278 3279 if (oldmat->colmap) { 3280 #if defined(PETSC_USE_CTABLE) 3281 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3282 #else 3283 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3284 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3285 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3286 #endif 3287 } else a->colmap = 0; 3288 if (oldmat->garray) { 3289 PetscInt len; 3290 len = oldmat->B->cmap->n; 3291 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3292 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3293 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3294 } else a->garray = 0; 3295 3296 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3297 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3298 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3299 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3300 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3301 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3302 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3303 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3304 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3305 *newmat = mat; 3306 PetscFunctionReturn(0); 3307 } 3308 3309 3310 3311 #undef __FUNCT__ 3312 #define __FUNCT__ "MatLoad_MPIAIJ" 3313 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3314 { 3315 PetscScalar *vals,*svals; 3316 MPI_Comm comm; 3317 PetscErrorCode ierr; 3318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3319 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3320 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3321 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3322 PetscInt cend,cstart,n,*rowners,sizesset=1; 3323 int fd; 3324 PetscInt bs = newMat->rmap->bs; 3325 3326 PetscFunctionBegin; 3327 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3328 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3329 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3330 if (!rank) { 3331 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3332 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3333 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3334 } 3335 3336 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3337 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3338 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3339 if (bs < 0) bs = 1; 3340 3341 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3342 3343 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3344 M = header[1]; N = header[2]; 3345 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3346 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3347 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3348 3349 /* If global sizes are set, check if they are consistent with that given in the file */ 3350 if (sizesset) { 3351 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3352 } 3353 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3354 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3355 3356 /* determine ownership of all (block) rows */ 3357 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3358 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3359 else m = newMat->rmap->n; /* Set by user */ 3360 3361 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3362 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3363 3364 /* First process needs enough room for process with most rows */ 3365 if (!rank) { 3366 mmax = rowners[1]; 3367 for (i=2; i<=size; i++) { 3368 mmax = PetscMax(mmax, rowners[i]); 3369 } 3370 } else mmax = -1; /* unused, but compilers complain */ 3371 3372 rowners[0] = 0; 3373 for (i=2; i<=size; i++) { 3374 rowners[i] += rowners[i-1]; 3375 } 3376 rstart = rowners[rank]; 3377 rend = rowners[rank+1]; 3378 3379 /* distribute row lengths to all processors */ 3380 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3381 if (!rank) { 3382 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3383 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3384 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3385 for (j=0; j<m; j++) { 3386 procsnz[0] += ourlens[j]; 3387 } 3388 for (i=1; i<size; i++) { 3389 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3390 /* calculate the number of nonzeros on each processor */ 3391 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3392 procsnz[i] += rowlengths[j]; 3393 } 3394 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3395 } 3396 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3397 } else { 3398 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3399 } 3400 3401 if (!rank) { 3402 /* determine max buffer needed and allocate it */ 3403 maxnz = 0; 3404 for (i=0; i<size; i++) { 3405 maxnz = PetscMax(maxnz,procsnz[i]); 3406 } 3407 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3408 3409 /* read in my part of the matrix column indices */ 3410 nz = procsnz[0]; 3411 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3412 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3413 3414 /* read in every one elses and ship off */ 3415 for (i=1; i<size; i++) { 3416 nz = procsnz[i]; 3417 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3418 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3419 } 3420 ierr = PetscFree(cols);CHKERRQ(ierr); 3421 } else { 3422 /* determine buffer space needed for message */ 3423 nz = 0; 3424 for (i=0; i<m; i++) { 3425 nz += ourlens[i]; 3426 } 3427 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3428 3429 /* receive message of column indices*/ 3430 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3431 } 3432 3433 /* determine column ownership if matrix is not square */ 3434 if (N != M) { 3435 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3436 else n = newMat->cmap->n; 3437 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3438 cstart = cend - n; 3439 } else { 3440 cstart = rstart; 3441 cend = rend; 3442 n = cend - cstart; 3443 } 3444 3445 /* loop over local rows, determining number of off diagonal entries */ 3446 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3447 jj = 0; 3448 for (i=0; i<m; i++) { 3449 for (j=0; j<ourlens[i]; j++) { 3450 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3451 jj++; 3452 } 3453 } 3454 3455 for (i=0; i<m; i++) { 3456 ourlens[i] -= offlens[i]; 3457 } 3458 if (!sizesset) { 3459 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3460 } 3461 3462 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3463 3464 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3465 3466 for (i=0; i<m; i++) { 3467 ourlens[i] += offlens[i]; 3468 } 3469 3470 if (!rank) { 3471 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3472 3473 /* read in my part of the matrix numerical values */ 3474 nz = procsnz[0]; 3475 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3476 3477 /* insert into matrix */ 3478 jj = rstart; 3479 smycols = mycols; 3480 svals = vals; 3481 for (i=0; i<m; i++) { 3482 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3483 smycols += ourlens[i]; 3484 svals += ourlens[i]; 3485 jj++; 3486 } 3487 3488 /* read in other processors and ship out */ 3489 for (i=1; i<size; i++) { 3490 nz = procsnz[i]; 3491 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3492 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3493 } 3494 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3495 } else { 3496 /* receive numeric values */ 3497 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3498 3499 /* receive message of values*/ 3500 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3501 3502 /* insert into matrix */ 3503 jj = rstart; 3504 smycols = mycols; 3505 svals = vals; 3506 for (i=0; i<m; i++) { 3507 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3508 smycols += ourlens[i]; 3509 svals += ourlens[i]; 3510 jj++; 3511 } 3512 } 3513 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3514 ierr = PetscFree(vals);CHKERRQ(ierr); 3515 ierr = PetscFree(mycols);CHKERRQ(ierr); 3516 ierr = PetscFree(rowners);CHKERRQ(ierr); 3517 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3518 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3519 PetscFunctionReturn(0); 3520 } 3521 3522 #undef __FUNCT__ 3523 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3524 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3525 { 3526 PetscErrorCode ierr; 3527 IS iscol_local; 3528 PetscInt csize; 3529 3530 PetscFunctionBegin; 3531 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3532 if (call == MAT_REUSE_MATRIX) { 3533 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3534 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3535 } else { 3536 PetscInt cbs; 3537 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3538 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3539 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3540 } 3541 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3542 if (call == MAT_INITIAL_MATRIX) { 3543 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3544 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3545 } 3546 PetscFunctionReturn(0); 3547 } 3548 3549 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3550 #undef __FUNCT__ 3551 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3552 /* 3553 Not great since it makes two copies of the submatrix, first an SeqAIJ 3554 in local and then by concatenating the local matrices the end result. 3555 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3556 3557 Note: This requires a sequential iscol with all indices. 3558 */ 3559 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3560 { 3561 PetscErrorCode ierr; 3562 PetscMPIInt rank,size; 3563 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3564 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3565 PetscBool allcolumns, colflag; 3566 Mat M,Mreuse; 3567 MatScalar *vwork,*aa; 3568 MPI_Comm comm; 3569 Mat_SeqAIJ *aij; 3570 3571 PetscFunctionBegin; 3572 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3573 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3574 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3575 3576 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3577 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3578 if (colflag && ncol == mat->cmap->N) { 3579 allcolumns = PETSC_TRUE; 3580 } else { 3581 allcolumns = PETSC_FALSE; 3582 } 3583 if (call == MAT_REUSE_MATRIX) { 3584 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3585 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3586 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3587 } else { 3588 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3589 } 3590 3591 /* 3592 m - number of local rows 3593 n - number of columns (same on all processors) 3594 rstart - first row in new global matrix generated 3595 */ 3596 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3597 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3598 if (call == MAT_INITIAL_MATRIX) { 3599 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3600 ii = aij->i; 3601 jj = aij->j; 3602 3603 /* 3604 Determine the number of non-zeros in the diagonal and off-diagonal 3605 portions of the matrix in order to do correct preallocation 3606 */ 3607 3608 /* first get start and end of "diagonal" columns */ 3609 if (csize == PETSC_DECIDE) { 3610 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3611 if (mglobal == n) { /* square matrix */ 3612 nlocal = m; 3613 } else { 3614 nlocal = n/size + ((n % size) > rank); 3615 } 3616 } else { 3617 nlocal = csize; 3618 } 3619 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3620 rstart = rend - nlocal; 3621 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3622 3623 /* next, compute all the lengths */ 3624 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3625 olens = dlens + m; 3626 for (i=0; i<m; i++) { 3627 jend = ii[i+1] - ii[i]; 3628 olen = 0; 3629 dlen = 0; 3630 for (j=0; j<jend; j++) { 3631 if (*jj < rstart || *jj >= rend) olen++; 3632 else dlen++; 3633 jj++; 3634 } 3635 olens[i] = olen; 3636 dlens[i] = dlen; 3637 } 3638 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3639 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3640 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3641 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3642 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3643 ierr = PetscFree(dlens);CHKERRQ(ierr); 3644 } else { 3645 PetscInt ml,nl; 3646 3647 M = *newmat; 3648 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3649 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3650 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3651 /* 3652 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3653 rather than the slower MatSetValues(). 3654 */ 3655 M->was_assembled = PETSC_TRUE; 3656 M->assembled = PETSC_FALSE; 3657 } 3658 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3659 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3660 ii = aij->i; 3661 jj = aij->j; 3662 aa = aij->a; 3663 for (i=0; i<m; i++) { 3664 row = rstart + i; 3665 nz = ii[i+1] - ii[i]; 3666 cwork = jj; jj += nz; 3667 vwork = aa; aa += nz; 3668 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3669 } 3670 3671 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3672 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3673 *newmat = M; 3674 3675 /* save submatrix used in processor for next request */ 3676 if (call == MAT_INITIAL_MATRIX) { 3677 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3678 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3679 } 3680 PetscFunctionReturn(0); 3681 } 3682 3683 #undef __FUNCT__ 3684 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3685 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3686 { 3687 PetscInt m,cstart, cend,j,nnz,i,d; 3688 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3689 const PetscInt *JJ; 3690 PetscScalar *values; 3691 PetscErrorCode ierr; 3692 3693 PetscFunctionBegin; 3694 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3695 3696 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3697 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3698 m = B->rmap->n; 3699 cstart = B->cmap->rstart; 3700 cend = B->cmap->rend; 3701 rstart = B->rmap->rstart; 3702 3703 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3704 3705 #if defined(PETSC_USE_DEBUGGING) 3706 for (i=0; i<m; i++) { 3707 nnz = Ii[i+1]- Ii[i]; 3708 JJ = J + Ii[i]; 3709 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3710 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3711 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3712 } 3713 #endif 3714 3715 for (i=0; i<m; i++) { 3716 nnz = Ii[i+1]- Ii[i]; 3717 JJ = J + Ii[i]; 3718 nnz_max = PetscMax(nnz_max,nnz); 3719 d = 0; 3720 for (j=0; j<nnz; j++) { 3721 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3722 } 3723 d_nnz[i] = d; 3724 o_nnz[i] = nnz - d; 3725 } 3726 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3727 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3728 3729 if (v) values = (PetscScalar*)v; 3730 else { 3731 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3732 } 3733 3734 for (i=0; i<m; i++) { 3735 ii = i + rstart; 3736 nnz = Ii[i+1]- Ii[i]; 3737 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3738 } 3739 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3740 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3741 3742 if (!v) { 3743 ierr = PetscFree(values);CHKERRQ(ierr); 3744 } 3745 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3746 PetscFunctionReturn(0); 3747 } 3748 3749 #undef __FUNCT__ 3750 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3751 /*@ 3752 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3753 (the default parallel PETSc format). 3754 3755 Collective on MPI_Comm 3756 3757 Input Parameters: 3758 + B - the matrix 3759 . i - the indices into j for the start of each local row (starts with zero) 3760 . j - the column indices for each local row (starts with zero) 3761 - v - optional values in the matrix 3762 3763 Level: developer 3764 3765 Notes: 3766 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3767 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3768 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3769 3770 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3771 3772 The format which is used for the sparse matrix input, is equivalent to a 3773 row-major ordering.. i.e for the following matrix, the input data expected is 3774 as shown: 3775 3776 1 0 0 3777 2 0 3 P0 3778 ------- 3779 4 5 6 P1 3780 3781 Process0 [P0]: rows_owned=[0,1] 3782 i = {0,1,3} [size = nrow+1 = 2+1] 3783 j = {0,0,2} [size = nz = 6] 3784 v = {1,2,3} [size = nz = 6] 3785 3786 Process1 [P1]: rows_owned=[2] 3787 i = {0,3} [size = nrow+1 = 1+1] 3788 j = {0,1,2} [size = nz = 6] 3789 v = {4,5,6} [size = nz = 6] 3790 3791 .keywords: matrix, aij, compressed row, sparse, parallel 3792 3793 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3794 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3795 @*/ 3796 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3797 { 3798 PetscErrorCode ierr; 3799 3800 PetscFunctionBegin; 3801 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3802 PetscFunctionReturn(0); 3803 } 3804 3805 #undef __FUNCT__ 3806 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3807 /*@C 3808 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3809 (the default parallel PETSc format). For good matrix assembly performance 3810 the user should preallocate the matrix storage by setting the parameters 3811 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3812 performance can be increased by more than a factor of 50. 3813 3814 Collective on MPI_Comm 3815 3816 Input Parameters: 3817 + B - the matrix 3818 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3819 (same value is used for all local rows) 3820 . d_nnz - array containing the number of nonzeros in the various rows of the 3821 DIAGONAL portion of the local submatrix (possibly different for each row) 3822 or NULL, if d_nz is used to specify the nonzero structure. 3823 The size of this array is equal to the number of local rows, i.e 'm'. 3824 For matrices that will be factored, you must leave room for (and set) 3825 the diagonal entry even if it is zero. 3826 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3827 submatrix (same value is used for all local rows). 3828 - o_nnz - array containing the number of nonzeros in the various rows of the 3829 OFF-DIAGONAL portion of the local submatrix (possibly different for 3830 each row) or NULL, if o_nz is used to specify the nonzero 3831 structure. The size of this array is equal to the number 3832 of local rows, i.e 'm'. 3833 3834 If the *_nnz parameter is given then the *_nz parameter is ignored 3835 3836 The AIJ format (also called the Yale sparse matrix format or 3837 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3838 storage. The stored row and column indices begin with zero. 3839 See Users-Manual: ch_mat for details. 3840 3841 The parallel matrix is partitioned such that the first m0 rows belong to 3842 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3843 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3844 3845 The DIAGONAL portion of the local submatrix of a processor can be defined 3846 as the submatrix which is obtained by extraction the part corresponding to 3847 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3848 first row that belongs to the processor, r2 is the last row belonging to 3849 the this processor, and c1-c2 is range of indices of the local part of a 3850 vector suitable for applying the matrix to. This is an mxn matrix. In the 3851 common case of a square matrix, the row and column ranges are the same and 3852 the DIAGONAL part is also square. The remaining portion of the local 3853 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3854 3855 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3856 3857 You can call MatGetInfo() to get information on how effective the preallocation was; 3858 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3859 You can also run with the option -info and look for messages with the string 3860 malloc in them to see if additional memory allocation was needed. 3861 3862 Example usage: 3863 3864 Consider the following 8x8 matrix with 34 non-zero values, that is 3865 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3866 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3867 as follows: 3868 3869 .vb 3870 1 2 0 | 0 3 0 | 0 4 3871 Proc0 0 5 6 | 7 0 0 | 8 0 3872 9 0 10 | 11 0 0 | 12 0 3873 ------------------------------------- 3874 13 0 14 | 15 16 17 | 0 0 3875 Proc1 0 18 0 | 19 20 21 | 0 0 3876 0 0 0 | 22 23 0 | 24 0 3877 ------------------------------------- 3878 Proc2 25 26 27 | 0 0 28 | 29 0 3879 30 0 0 | 31 32 33 | 0 34 3880 .ve 3881 3882 This can be represented as a collection of submatrices as: 3883 3884 .vb 3885 A B C 3886 D E F 3887 G H I 3888 .ve 3889 3890 Where the submatrices A,B,C are owned by proc0, D,E,F are 3891 owned by proc1, G,H,I are owned by proc2. 3892 3893 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3894 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3895 The 'M','N' parameters are 8,8, and have the same values on all procs. 3896 3897 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3898 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3899 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3900 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3901 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3902 matrix, ans [DF] as another SeqAIJ matrix. 3903 3904 When d_nz, o_nz parameters are specified, d_nz storage elements are 3905 allocated for every row of the local diagonal submatrix, and o_nz 3906 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3907 One way to choose d_nz and o_nz is to use the max nonzerors per local 3908 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3909 In this case, the values of d_nz,o_nz are: 3910 .vb 3911 proc0 : dnz = 2, o_nz = 2 3912 proc1 : dnz = 3, o_nz = 2 3913 proc2 : dnz = 1, o_nz = 4 3914 .ve 3915 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3916 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3917 for proc3. i.e we are using 12+15+10=37 storage locations to store 3918 34 values. 3919 3920 When d_nnz, o_nnz parameters are specified, the storage is specified 3921 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3922 In the above case the values for d_nnz,o_nnz are: 3923 .vb 3924 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3925 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3926 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3927 .ve 3928 Here the space allocated is sum of all the above values i.e 34, and 3929 hence pre-allocation is perfect. 3930 3931 Level: intermediate 3932 3933 .keywords: matrix, aij, compressed row, sparse, parallel 3934 3935 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3936 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3937 @*/ 3938 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3939 { 3940 PetscErrorCode ierr; 3941 3942 PetscFunctionBegin; 3943 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3944 PetscValidType(B,1); 3945 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3946 PetscFunctionReturn(0); 3947 } 3948 3949 #undef __FUNCT__ 3950 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3951 /*@ 3952 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3953 CSR format the local rows. 3954 3955 Collective on MPI_Comm 3956 3957 Input Parameters: 3958 + comm - MPI communicator 3959 . m - number of local rows (Cannot be PETSC_DECIDE) 3960 . n - This value should be the same as the local size used in creating the 3961 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3962 calculated if N is given) For square matrices n is almost always m. 3963 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3964 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3965 . i - row indices 3966 . j - column indices 3967 - a - matrix values 3968 3969 Output Parameter: 3970 . mat - the matrix 3971 3972 Level: intermediate 3973 3974 Notes: 3975 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3976 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3977 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3978 3979 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3980 3981 The format which is used for the sparse matrix input, is equivalent to a 3982 row-major ordering.. i.e for the following matrix, the input data expected is 3983 as shown: 3984 3985 1 0 0 3986 2 0 3 P0 3987 ------- 3988 4 5 6 P1 3989 3990 Process0 [P0]: rows_owned=[0,1] 3991 i = {0,1,3} [size = nrow+1 = 2+1] 3992 j = {0,0,2} [size = nz = 6] 3993 v = {1,2,3} [size = nz = 6] 3994 3995 Process1 [P1]: rows_owned=[2] 3996 i = {0,3} [size = nrow+1 = 1+1] 3997 j = {0,1,2} [size = nz = 6] 3998 v = {4,5,6} [size = nz = 6] 3999 4000 .keywords: matrix, aij, compressed row, sparse, parallel 4001 4002 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4003 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4004 @*/ 4005 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4006 { 4007 PetscErrorCode ierr; 4008 4009 PetscFunctionBegin; 4010 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4011 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4012 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4013 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4014 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4015 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4016 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4017 PetscFunctionReturn(0); 4018 } 4019 4020 #undef __FUNCT__ 4021 #define __FUNCT__ "MatCreateAIJ" 4022 /*@C 4023 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4024 (the default parallel PETSc format). For good matrix assembly performance 4025 the user should preallocate the matrix storage by setting the parameters 4026 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4027 performance can be increased by more than a factor of 50. 4028 4029 Collective on MPI_Comm 4030 4031 Input Parameters: 4032 + comm - MPI communicator 4033 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4034 This value should be the same as the local size used in creating the 4035 y vector for the matrix-vector product y = Ax. 4036 . n - This value should be the same as the local size used in creating the 4037 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4038 calculated if N is given) For square matrices n is almost always m. 4039 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4040 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4041 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4042 (same value is used for all local rows) 4043 . d_nnz - array containing the number of nonzeros in the various rows of the 4044 DIAGONAL portion of the local submatrix (possibly different for each row) 4045 or NULL, if d_nz is used to specify the nonzero structure. 4046 The size of this array is equal to the number of local rows, i.e 'm'. 4047 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4048 submatrix (same value is used for all local rows). 4049 - o_nnz - array containing the number of nonzeros in the various rows of the 4050 OFF-DIAGONAL portion of the local submatrix (possibly different for 4051 each row) or NULL, if o_nz is used to specify the nonzero 4052 structure. The size of this array is equal to the number 4053 of local rows, i.e 'm'. 4054 4055 Output Parameter: 4056 . A - the matrix 4057 4058 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4059 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4060 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4061 4062 Notes: 4063 If the *_nnz parameter is given then the *_nz parameter is ignored 4064 4065 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4066 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4067 storage requirements for this matrix. 4068 4069 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4070 processor than it must be used on all processors that share the object for 4071 that argument. 4072 4073 The user MUST specify either the local or global matrix dimensions 4074 (possibly both). 4075 4076 The parallel matrix is partitioned across processors such that the 4077 first m0 rows belong to process 0, the next m1 rows belong to 4078 process 1, the next m2 rows belong to process 2 etc.. where 4079 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4080 values corresponding to [m x N] submatrix. 4081 4082 The columns are logically partitioned with the n0 columns belonging 4083 to 0th partition, the next n1 columns belonging to the next 4084 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4085 4086 The DIAGONAL portion of the local submatrix on any given processor 4087 is the submatrix corresponding to the rows and columns m,n 4088 corresponding to the given processor. i.e diagonal matrix on 4089 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4090 etc. The remaining portion of the local submatrix [m x (N-n)] 4091 constitute the OFF-DIAGONAL portion. The example below better 4092 illustrates this concept. 4093 4094 For a square global matrix we define each processor's diagonal portion 4095 to be its local rows and the corresponding columns (a square submatrix); 4096 each processor's off-diagonal portion encompasses the remainder of the 4097 local matrix (a rectangular submatrix). 4098 4099 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4100 4101 When calling this routine with a single process communicator, a matrix of 4102 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4103 type of communicator, use the construction mechanism: 4104 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4105 4106 By default, this format uses inodes (identical nodes) when possible. 4107 We search for consecutive rows with the same nonzero structure, thereby 4108 reusing matrix information to achieve increased efficiency. 4109 4110 Options Database Keys: 4111 + -mat_no_inode - Do not use inodes 4112 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4113 - -mat_aij_oneindex - Internally use indexing starting at 1 4114 rather than 0. Note that when calling MatSetValues(), 4115 the user still MUST index entries starting at 0! 4116 4117 4118 Example usage: 4119 4120 Consider the following 8x8 matrix with 34 non-zero values, that is 4121 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4122 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4123 as follows: 4124 4125 .vb 4126 1 2 0 | 0 3 0 | 0 4 4127 Proc0 0 5 6 | 7 0 0 | 8 0 4128 9 0 10 | 11 0 0 | 12 0 4129 ------------------------------------- 4130 13 0 14 | 15 16 17 | 0 0 4131 Proc1 0 18 0 | 19 20 21 | 0 0 4132 0 0 0 | 22 23 0 | 24 0 4133 ------------------------------------- 4134 Proc2 25 26 27 | 0 0 28 | 29 0 4135 30 0 0 | 31 32 33 | 0 34 4136 .ve 4137 4138 This can be represented as a collection of submatrices as: 4139 4140 .vb 4141 A B C 4142 D E F 4143 G H I 4144 .ve 4145 4146 Where the submatrices A,B,C are owned by proc0, D,E,F are 4147 owned by proc1, G,H,I are owned by proc2. 4148 4149 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4150 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4151 The 'M','N' parameters are 8,8, and have the same values on all procs. 4152 4153 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4154 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4155 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4156 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4157 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4158 matrix, ans [DF] as another SeqAIJ matrix. 4159 4160 When d_nz, o_nz parameters are specified, d_nz storage elements are 4161 allocated for every row of the local diagonal submatrix, and o_nz 4162 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4163 One way to choose d_nz and o_nz is to use the max nonzerors per local 4164 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4165 In this case, the values of d_nz,o_nz are: 4166 .vb 4167 proc0 : dnz = 2, o_nz = 2 4168 proc1 : dnz = 3, o_nz = 2 4169 proc2 : dnz = 1, o_nz = 4 4170 .ve 4171 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4172 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4173 for proc3. i.e we are using 12+15+10=37 storage locations to store 4174 34 values. 4175 4176 When d_nnz, o_nnz parameters are specified, the storage is specified 4177 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4178 In the above case the values for d_nnz,o_nnz are: 4179 .vb 4180 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4181 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4182 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4183 .ve 4184 Here the space allocated is sum of all the above values i.e 34, and 4185 hence pre-allocation is perfect. 4186 4187 Level: intermediate 4188 4189 .keywords: matrix, aij, compressed row, sparse, parallel 4190 4191 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4192 MPIAIJ, MatCreateMPIAIJWithArrays() 4193 @*/ 4194 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4195 { 4196 PetscErrorCode ierr; 4197 PetscMPIInt size; 4198 4199 PetscFunctionBegin; 4200 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4201 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4202 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4203 if (size > 1) { 4204 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4205 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4206 } else { 4207 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4208 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4209 } 4210 PetscFunctionReturn(0); 4211 } 4212 4213 #undef __FUNCT__ 4214 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4215 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4216 { 4217 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4218 4219 PetscFunctionBegin; 4220 if (Ad) *Ad = a->A; 4221 if (Ao) *Ao = a->B; 4222 if (colmap) *colmap = a->garray; 4223 PetscFunctionReturn(0); 4224 } 4225 4226 #undef __FUNCT__ 4227 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4228 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4229 { 4230 PetscErrorCode ierr; 4231 PetscInt i; 4232 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4233 4234 PetscFunctionBegin; 4235 if (coloring->ctype == IS_COLORING_GLOBAL) { 4236 ISColoringValue *allcolors,*colors; 4237 ISColoring ocoloring; 4238 4239 /* set coloring for diagonal portion */ 4240 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4241 4242 /* set coloring for off-diagonal portion */ 4243 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4244 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4245 for (i=0; i<a->B->cmap->n; i++) { 4246 colors[i] = allcolors[a->garray[i]]; 4247 } 4248 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4249 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4250 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4251 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4252 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4253 ISColoringValue *colors; 4254 PetscInt *larray; 4255 ISColoring ocoloring; 4256 4257 /* set coloring for diagonal portion */ 4258 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4259 for (i=0; i<a->A->cmap->n; i++) { 4260 larray[i] = i + A->cmap->rstart; 4261 } 4262 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4263 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4264 for (i=0; i<a->A->cmap->n; i++) { 4265 colors[i] = coloring->colors[larray[i]]; 4266 } 4267 ierr = PetscFree(larray);CHKERRQ(ierr); 4268 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4269 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4270 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4271 4272 /* set coloring for off-diagonal portion */ 4273 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4274 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4275 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4276 for (i=0; i<a->B->cmap->n; i++) { 4277 colors[i] = coloring->colors[larray[i]]; 4278 } 4279 ierr = PetscFree(larray);CHKERRQ(ierr); 4280 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4281 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4282 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4283 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4284 PetscFunctionReturn(0); 4285 } 4286 4287 #undef __FUNCT__ 4288 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4289 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4290 { 4291 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4292 PetscErrorCode ierr; 4293 4294 PetscFunctionBegin; 4295 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4296 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4297 PetscFunctionReturn(0); 4298 } 4299 4300 #undef __FUNCT__ 4301 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4302 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4303 { 4304 PetscErrorCode ierr; 4305 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4306 PetscInt *indx; 4307 4308 PetscFunctionBegin; 4309 /* This routine will ONLY return MPIAIJ type matrix */ 4310 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4311 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4312 if (n == PETSC_DECIDE) { 4313 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4314 } 4315 /* Check sum(n) = N */ 4316 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4317 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4318 4319 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4320 rstart -= m; 4321 4322 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4323 for (i=0; i<m; i++) { 4324 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4325 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4326 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4327 } 4328 4329 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4330 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4331 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4332 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4333 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4334 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4335 PetscFunctionReturn(0); 4336 } 4337 4338 #undef __FUNCT__ 4339 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4340 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4341 { 4342 PetscErrorCode ierr; 4343 PetscInt m,N,i,rstart,nnz,Ii; 4344 PetscInt *indx; 4345 PetscScalar *values; 4346 4347 PetscFunctionBegin; 4348 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4349 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4350 for (i=0; i<m; i++) { 4351 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4352 Ii = i + rstart; 4353 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4354 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4355 } 4356 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4357 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4358 PetscFunctionReturn(0); 4359 } 4360 4361 #undef __FUNCT__ 4362 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4363 /*@ 4364 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4365 matrices from each processor 4366 4367 Collective on MPI_Comm 4368 4369 Input Parameters: 4370 + comm - the communicators the parallel matrix will live on 4371 . inmat - the input sequential matrices 4372 . n - number of local columns (or PETSC_DECIDE) 4373 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4374 4375 Output Parameter: 4376 . outmat - the parallel matrix generated 4377 4378 Level: advanced 4379 4380 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4381 4382 @*/ 4383 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4384 { 4385 PetscErrorCode ierr; 4386 PetscMPIInt size; 4387 4388 PetscFunctionBegin; 4389 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4390 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4391 if (size == 1) { 4392 if (scall == MAT_INITIAL_MATRIX) { 4393 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4394 } else { 4395 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4396 } 4397 } else { 4398 if (scall == MAT_INITIAL_MATRIX) { 4399 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4400 } 4401 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4402 } 4403 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4404 PetscFunctionReturn(0); 4405 } 4406 4407 #undef __FUNCT__ 4408 #define __FUNCT__ "MatFileSplit" 4409 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4410 { 4411 PetscErrorCode ierr; 4412 PetscMPIInt rank; 4413 PetscInt m,N,i,rstart,nnz; 4414 size_t len; 4415 const PetscInt *indx; 4416 PetscViewer out; 4417 char *name; 4418 Mat B; 4419 const PetscScalar *values; 4420 4421 PetscFunctionBegin; 4422 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4423 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4424 /* Should this be the type of the diagonal block of A? */ 4425 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4426 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4427 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4428 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4429 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4430 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4431 for (i=0; i<m; i++) { 4432 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4433 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4434 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4435 } 4436 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4437 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4438 4439 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4440 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4441 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4442 sprintf(name,"%s.%d",outfile,rank); 4443 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4444 ierr = PetscFree(name);CHKERRQ(ierr); 4445 ierr = MatView(B,out);CHKERRQ(ierr); 4446 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4447 ierr = MatDestroy(&B);CHKERRQ(ierr); 4448 PetscFunctionReturn(0); 4449 } 4450 4451 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4452 #undef __FUNCT__ 4453 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4454 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4455 { 4456 PetscErrorCode ierr; 4457 Mat_Merge_SeqsToMPI *merge; 4458 PetscContainer container; 4459 4460 PetscFunctionBegin; 4461 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4462 if (container) { 4463 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4464 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4465 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4466 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4467 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4468 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4469 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4470 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4471 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4472 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4473 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4474 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4475 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4476 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4477 ierr = PetscFree(merge);CHKERRQ(ierr); 4478 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4479 } 4480 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4481 PetscFunctionReturn(0); 4482 } 4483 4484 #include <../src/mat/utils/freespace.h> 4485 #include <petscbt.h> 4486 4487 #undef __FUNCT__ 4488 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4489 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4490 { 4491 PetscErrorCode ierr; 4492 MPI_Comm comm; 4493 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4494 PetscMPIInt size,rank,taga,*len_s; 4495 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4496 PetscInt proc,m; 4497 PetscInt **buf_ri,**buf_rj; 4498 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4499 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4500 MPI_Request *s_waits,*r_waits; 4501 MPI_Status *status; 4502 MatScalar *aa=a->a; 4503 MatScalar **abuf_r,*ba_i; 4504 Mat_Merge_SeqsToMPI *merge; 4505 PetscContainer container; 4506 4507 PetscFunctionBegin; 4508 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4509 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4510 4511 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4512 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4513 4514 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4515 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4516 4517 bi = merge->bi; 4518 bj = merge->bj; 4519 buf_ri = merge->buf_ri; 4520 buf_rj = merge->buf_rj; 4521 4522 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4523 owners = merge->rowmap->range; 4524 len_s = merge->len_s; 4525 4526 /* send and recv matrix values */ 4527 /*-----------------------------*/ 4528 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4529 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4530 4531 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4532 for (proc=0,k=0; proc<size; proc++) { 4533 if (!len_s[proc]) continue; 4534 i = owners[proc]; 4535 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4536 k++; 4537 } 4538 4539 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4540 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4541 ierr = PetscFree(status);CHKERRQ(ierr); 4542 4543 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4544 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4545 4546 /* insert mat values of mpimat */ 4547 /*----------------------------*/ 4548 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4549 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4550 4551 for (k=0; k<merge->nrecv; k++) { 4552 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4553 nrows = *(buf_ri_k[k]); 4554 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4555 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4556 } 4557 4558 /* set values of ba */ 4559 m = merge->rowmap->n; 4560 for (i=0; i<m; i++) { 4561 arow = owners[rank] + i; 4562 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4563 bnzi = bi[i+1] - bi[i]; 4564 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4565 4566 /* add local non-zero vals of this proc's seqmat into ba */ 4567 anzi = ai[arow+1] - ai[arow]; 4568 aj = a->j + ai[arow]; 4569 aa = a->a + ai[arow]; 4570 nextaj = 0; 4571 for (j=0; nextaj<anzi; j++) { 4572 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4573 ba_i[j] += aa[nextaj++]; 4574 } 4575 } 4576 4577 /* add received vals into ba */ 4578 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4579 /* i-th row */ 4580 if (i == *nextrow[k]) { 4581 anzi = *(nextai[k]+1) - *nextai[k]; 4582 aj = buf_rj[k] + *(nextai[k]); 4583 aa = abuf_r[k] + *(nextai[k]); 4584 nextaj = 0; 4585 for (j=0; nextaj<anzi; j++) { 4586 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4587 ba_i[j] += aa[nextaj++]; 4588 } 4589 } 4590 nextrow[k]++; nextai[k]++; 4591 } 4592 } 4593 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4594 } 4595 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4596 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4597 4598 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4599 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4600 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4601 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4602 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4603 PetscFunctionReturn(0); 4604 } 4605 4606 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4607 4608 #undef __FUNCT__ 4609 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4610 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4611 { 4612 PetscErrorCode ierr; 4613 Mat B_mpi; 4614 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4615 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4616 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4617 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4618 PetscInt len,proc,*dnz,*onz,bs,cbs; 4619 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4620 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4621 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4622 MPI_Status *status; 4623 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4624 PetscBT lnkbt; 4625 Mat_Merge_SeqsToMPI *merge; 4626 PetscContainer container; 4627 4628 PetscFunctionBegin; 4629 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4630 4631 /* make sure it is a PETSc comm */ 4632 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4633 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4634 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4635 4636 ierr = PetscNew(&merge);CHKERRQ(ierr); 4637 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4638 4639 /* determine row ownership */ 4640 /*---------------------------------------------------------*/ 4641 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4642 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4643 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4644 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4645 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4646 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4647 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4648 4649 m = merge->rowmap->n; 4650 owners = merge->rowmap->range; 4651 4652 /* determine the number of messages to send, their lengths */ 4653 /*---------------------------------------------------------*/ 4654 len_s = merge->len_s; 4655 4656 len = 0; /* length of buf_si[] */ 4657 merge->nsend = 0; 4658 for (proc=0; proc<size; proc++) { 4659 len_si[proc] = 0; 4660 if (proc == rank) { 4661 len_s[proc] = 0; 4662 } else { 4663 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4664 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4665 } 4666 if (len_s[proc]) { 4667 merge->nsend++; 4668 nrows = 0; 4669 for (i=owners[proc]; i<owners[proc+1]; i++) { 4670 if (ai[i+1] > ai[i]) nrows++; 4671 } 4672 len_si[proc] = 2*(nrows+1); 4673 len += len_si[proc]; 4674 } 4675 } 4676 4677 /* determine the number and length of messages to receive for ij-structure */ 4678 /*-------------------------------------------------------------------------*/ 4679 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4680 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4681 4682 /* post the Irecv of j-structure */ 4683 /*-------------------------------*/ 4684 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4685 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4686 4687 /* post the Isend of j-structure */ 4688 /*--------------------------------*/ 4689 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4690 4691 for (proc=0, k=0; proc<size; proc++) { 4692 if (!len_s[proc]) continue; 4693 i = owners[proc]; 4694 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4695 k++; 4696 } 4697 4698 /* receives and sends of j-structure are complete */ 4699 /*------------------------------------------------*/ 4700 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4701 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4702 4703 /* send and recv i-structure */ 4704 /*---------------------------*/ 4705 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4706 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4707 4708 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4709 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4710 for (proc=0,k=0; proc<size; proc++) { 4711 if (!len_s[proc]) continue; 4712 /* form outgoing message for i-structure: 4713 buf_si[0]: nrows to be sent 4714 [1:nrows]: row index (global) 4715 [nrows+1:2*nrows+1]: i-structure index 4716 */ 4717 /*-------------------------------------------*/ 4718 nrows = len_si[proc]/2 - 1; 4719 buf_si_i = buf_si + nrows+1; 4720 buf_si[0] = nrows; 4721 buf_si_i[0] = 0; 4722 nrows = 0; 4723 for (i=owners[proc]; i<owners[proc+1]; i++) { 4724 anzi = ai[i+1] - ai[i]; 4725 if (anzi) { 4726 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4727 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4728 nrows++; 4729 } 4730 } 4731 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4732 k++; 4733 buf_si += len_si[proc]; 4734 } 4735 4736 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4737 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4738 4739 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4740 for (i=0; i<merge->nrecv; i++) { 4741 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4742 } 4743 4744 ierr = PetscFree(len_si);CHKERRQ(ierr); 4745 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4746 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4747 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4748 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4749 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4750 ierr = PetscFree(status);CHKERRQ(ierr); 4751 4752 /* compute a local seq matrix in each processor */ 4753 /*----------------------------------------------*/ 4754 /* allocate bi array and free space for accumulating nonzero column info */ 4755 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4756 bi[0] = 0; 4757 4758 /* create and initialize a linked list */ 4759 nlnk = N+1; 4760 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4761 4762 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4763 len = ai[owners[rank+1]] - ai[owners[rank]]; 4764 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4765 4766 current_space = free_space; 4767 4768 /* determine symbolic info for each local row */ 4769 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4770 4771 for (k=0; k<merge->nrecv; k++) { 4772 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4773 nrows = *buf_ri_k[k]; 4774 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4775 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4776 } 4777 4778 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4779 len = 0; 4780 for (i=0; i<m; i++) { 4781 bnzi = 0; 4782 /* add local non-zero cols of this proc's seqmat into lnk */ 4783 arow = owners[rank] + i; 4784 anzi = ai[arow+1] - ai[arow]; 4785 aj = a->j + ai[arow]; 4786 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4787 bnzi += nlnk; 4788 /* add received col data into lnk */ 4789 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4790 if (i == *nextrow[k]) { /* i-th row */ 4791 anzi = *(nextai[k]+1) - *nextai[k]; 4792 aj = buf_rj[k] + *nextai[k]; 4793 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4794 bnzi += nlnk; 4795 nextrow[k]++; nextai[k]++; 4796 } 4797 } 4798 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4799 4800 /* if free space is not available, make more free space */ 4801 if (current_space->local_remaining<bnzi) { 4802 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4803 nspacedouble++; 4804 } 4805 /* copy data into free space, then initialize lnk */ 4806 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4807 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4808 4809 current_space->array += bnzi; 4810 current_space->local_used += bnzi; 4811 current_space->local_remaining -= bnzi; 4812 4813 bi[i+1] = bi[i] + bnzi; 4814 } 4815 4816 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4817 4818 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4819 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4820 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4821 4822 /* create symbolic parallel matrix B_mpi */ 4823 /*---------------------------------------*/ 4824 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4825 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4826 if (n==PETSC_DECIDE) { 4827 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4828 } else { 4829 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4830 } 4831 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4832 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4833 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4834 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4835 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4836 4837 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4838 B_mpi->assembled = PETSC_FALSE; 4839 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4840 merge->bi = bi; 4841 merge->bj = bj; 4842 merge->buf_ri = buf_ri; 4843 merge->buf_rj = buf_rj; 4844 merge->coi = NULL; 4845 merge->coj = NULL; 4846 merge->owners_co = NULL; 4847 4848 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4849 4850 /* attach the supporting struct to B_mpi for reuse */ 4851 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4852 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4853 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4854 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4855 *mpimat = B_mpi; 4856 4857 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4858 PetscFunctionReturn(0); 4859 } 4860 4861 #undef __FUNCT__ 4862 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4863 /*@C 4864 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4865 matrices from each processor 4866 4867 Collective on MPI_Comm 4868 4869 Input Parameters: 4870 + comm - the communicators the parallel matrix will live on 4871 . seqmat - the input sequential matrices 4872 . m - number of local rows (or PETSC_DECIDE) 4873 . n - number of local columns (or PETSC_DECIDE) 4874 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4875 4876 Output Parameter: 4877 . mpimat - the parallel matrix generated 4878 4879 Level: advanced 4880 4881 Notes: 4882 The dimensions of the sequential matrix in each processor MUST be the same. 4883 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4884 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4885 @*/ 4886 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4887 { 4888 PetscErrorCode ierr; 4889 PetscMPIInt size; 4890 4891 PetscFunctionBegin; 4892 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4893 if (size == 1) { 4894 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4895 if (scall == MAT_INITIAL_MATRIX) { 4896 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4897 } else { 4898 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4899 } 4900 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4901 PetscFunctionReturn(0); 4902 } 4903 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4904 if (scall == MAT_INITIAL_MATRIX) { 4905 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4906 } 4907 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4908 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4909 PetscFunctionReturn(0); 4910 } 4911 4912 #undef __FUNCT__ 4913 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4914 /*@ 4915 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4916 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4917 with MatGetSize() 4918 4919 Not Collective 4920 4921 Input Parameters: 4922 + A - the matrix 4923 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4924 4925 Output Parameter: 4926 . A_loc - the local sequential matrix generated 4927 4928 Level: developer 4929 4930 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4931 4932 @*/ 4933 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4934 { 4935 PetscErrorCode ierr; 4936 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4937 Mat_SeqAIJ *mat,*a,*b; 4938 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4939 MatScalar *aa,*ba,*cam; 4940 PetscScalar *ca; 4941 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4942 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4943 PetscBool match; 4944 MPI_Comm comm; 4945 PetscMPIInt size; 4946 4947 PetscFunctionBegin; 4948 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4949 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4950 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4951 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4952 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4953 4954 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4955 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4956 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4957 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4958 aa = a->a; ba = b->a; 4959 if (scall == MAT_INITIAL_MATRIX) { 4960 if (size == 1) { 4961 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4962 PetscFunctionReturn(0); 4963 } 4964 4965 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4966 ci[0] = 0; 4967 for (i=0; i<am; i++) { 4968 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4969 } 4970 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4971 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4972 k = 0; 4973 for (i=0; i<am; i++) { 4974 ncols_o = bi[i+1] - bi[i]; 4975 ncols_d = ai[i+1] - ai[i]; 4976 /* off-diagonal portion of A */ 4977 for (jo=0; jo<ncols_o; jo++) { 4978 col = cmap[*bj]; 4979 if (col >= cstart) break; 4980 cj[k] = col; bj++; 4981 ca[k++] = *ba++; 4982 } 4983 /* diagonal portion of A */ 4984 for (j=0; j<ncols_d; j++) { 4985 cj[k] = cstart + *aj++; 4986 ca[k++] = *aa++; 4987 } 4988 /* off-diagonal portion of A */ 4989 for (j=jo; j<ncols_o; j++) { 4990 cj[k] = cmap[*bj++]; 4991 ca[k++] = *ba++; 4992 } 4993 } 4994 /* put together the new matrix */ 4995 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4996 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4997 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4998 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4999 mat->free_a = PETSC_TRUE; 5000 mat->free_ij = PETSC_TRUE; 5001 mat->nonew = 0; 5002 } else if (scall == MAT_REUSE_MATRIX) { 5003 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5004 ci = mat->i; cj = mat->j; cam = mat->a; 5005 for (i=0; i<am; i++) { 5006 /* off-diagonal portion of A */ 5007 ncols_o = bi[i+1] - bi[i]; 5008 for (jo=0; jo<ncols_o; jo++) { 5009 col = cmap[*bj]; 5010 if (col >= cstart) break; 5011 *cam++ = *ba++; bj++; 5012 } 5013 /* diagonal portion of A */ 5014 ncols_d = ai[i+1] - ai[i]; 5015 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5016 /* off-diagonal portion of A */ 5017 for (j=jo; j<ncols_o; j++) { 5018 *cam++ = *ba++; bj++; 5019 } 5020 } 5021 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5022 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5023 PetscFunctionReturn(0); 5024 } 5025 5026 #undef __FUNCT__ 5027 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5028 /*@C 5029 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5030 5031 Not Collective 5032 5033 Input Parameters: 5034 + A - the matrix 5035 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5036 - row, col - index sets of rows and columns to extract (or NULL) 5037 5038 Output Parameter: 5039 . A_loc - the local sequential matrix generated 5040 5041 Level: developer 5042 5043 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5044 5045 @*/ 5046 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5047 { 5048 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5049 PetscErrorCode ierr; 5050 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5051 IS isrowa,iscola; 5052 Mat *aloc; 5053 PetscBool match; 5054 5055 PetscFunctionBegin; 5056 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5057 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5058 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5059 if (!row) { 5060 start = A->rmap->rstart; end = A->rmap->rend; 5061 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5062 } else { 5063 isrowa = *row; 5064 } 5065 if (!col) { 5066 start = A->cmap->rstart; 5067 cmap = a->garray; 5068 nzA = a->A->cmap->n; 5069 nzB = a->B->cmap->n; 5070 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5071 ncols = 0; 5072 for (i=0; i<nzB; i++) { 5073 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5074 else break; 5075 } 5076 imark = i; 5077 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5078 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5079 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5080 } else { 5081 iscola = *col; 5082 } 5083 if (scall != MAT_INITIAL_MATRIX) { 5084 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5085 aloc[0] = *A_loc; 5086 } 5087 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5088 *A_loc = aloc[0]; 5089 ierr = PetscFree(aloc);CHKERRQ(ierr); 5090 if (!row) { 5091 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5092 } 5093 if (!col) { 5094 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5095 } 5096 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5097 PetscFunctionReturn(0); 5098 } 5099 5100 #undef __FUNCT__ 5101 #define __FUNCT__ "MatGetBrowsOfAcols" 5102 /*@C 5103 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5104 5105 Collective on Mat 5106 5107 Input Parameters: 5108 + A,B - the matrices in mpiaij format 5109 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5110 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5111 5112 Output Parameter: 5113 + rowb, colb - index sets of rows and columns of B to extract 5114 - B_seq - the sequential matrix generated 5115 5116 Level: developer 5117 5118 @*/ 5119 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5120 { 5121 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5122 PetscErrorCode ierr; 5123 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5124 IS isrowb,iscolb; 5125 Mat *bseq=NULL; 5126 5127 PetscFunctionBegin; 5128 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5129 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5130 } 5131 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5132 5133 if (scall == MAT_INITIAL_MATRIX) { 5134 start = A->cmap->rstart; 5135 cmap = a->garray; 5136 nzA = a->A->cmap->n; 5137 nzB = a->B->cmap->n; 5138 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5139 ncols = 0; 5140 for (i=0; i<nzB; i++) { /* row < local row index */ 5141 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5142 else break; 5143 } 5144 imark = i; 5145 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5146 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5147 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5148 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5149 } else { 5150 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5151 isrowb = *rowb; iscolb = *colb; 5152 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5153 bseq[0] = *B_seq; 5154 } 5155 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5156 *B_seq = bseq[0]; 5157 ierr = PetscFree(bseq);CHKERRQ(ierr); 5158 if (!rowb) { 5159 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5160 } else { 5161 *rowb = isrowb; 5162 } 5163 if (!colb) { 5164 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5165 } else { 5166 *colb = iscolb; 5167 } 5168 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5169 PetscFunctionReturn(0); 5170 } 5171 5172 #undef __FUNCT__ 5173 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5174 /* 5175 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5176 of the OFF-DIAGONAL portion of local A 5177 5178 Collective on Mat 5179 5180 Input Parameters: 5181 + A,B - the matrices in mpiaij format 5182 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5183 5184 Output Parameter: 5185 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5186 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5187 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5188 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5189 5190 Level: developer 5191 5192 */ 5193 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5194 { 5195 VecScatter_MPI_General *gen_to,*gen_from; 5196 PetscErrorCode ierr; 5197 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5198 Mat_SeqAIJ *b_oth; 5199 VecScatter ctx =a->Mvctx; 5200 MPI_Comm comm; 5201 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5202 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5203 PetscScalar *rvalues,*svalues; 5204 MatScalar *b_otha,*bufa,*bufA; 5205 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5206 MPI_Request *rwaits = NULL,*swaits = NULL; 5207 MPI_Status *sstatus,rstatus; 5208 PetscMPIInt jj,size; 5209 PetscInt *cols,sbs,rbs; 5210 PetscScalar *vals; 5211 5212 PetscFunctionBegin; 5213 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5215 if (size == 1) PetscFunctionReturn(0); 5216 5217 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5218 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5219 } 5220 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5221 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5222 5223 gen_to = (VecScatter_MPI_General*)ctx->todata; 5224 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5225 rvalues = gen_from->values; /* holds the length of receiving row */ 5226 svalues = gen_to->values; /* holds the length of sending row */ 5227 nrecvs = gen_from->n; 5228 nsends = gen_to->n; 5229 5230 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5231 srow = gen_to->indices; /* local row index to be sent */ 5232 sstarts = gen_to->starts; 5233 sprocs = gen_to->procs; 5234 sstatus = gen_to->sstatus; 5235 sbs = gen_to->bs; 5236 rstarts = gen_from->starts; 5237 rprocs = gen_from->procs; 5238 rbs = gen_from->bs; 5239 5240 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5241 if (scall == MAT_INITIAL_MATRIX) { 5242 /* i-array */ 5243 /*---------*/ 5244 /* post receives */ 5245 for (i=0; i<nrecvs; i++) { 5246 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5247 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5248 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5249 } 5250 5251 /* pack the outgoing message */ 5252 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5253 5254 sstartsj[0] = 0; 5255 rstartsj[0] = 0; 5256 len = 0; /* total length of j or a array to be sent */ 5257 k = 0; 5258 for (i=0; i<nsends; i++) { 5259 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5260 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5261 for (j=0; j<nrows; j++) { 5262 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5263 for (l=0; l<sbs; l++) { 5264 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5265 5266 rowlen[j*sbs+l] = ncols; 5267 5268 len += ncols; 5269 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5270 } 5271 k++; 5272 } 5273 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5274 5275 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5276 } 5277 /* recvs and sends of i-array are completed */ 5278 i = nrecvs; 5279 while (i--) { 5280 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5281 } 5282 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5283 5284 /* allocate buffers for sending j and a arrays */ 5285 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5286 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5287 5288 /* create i-array of B_oth */ 5289 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5290 5291 b_othi[0] = 0; 5292 len = 0; /* total length of j or a array to be received */ 5293 k = 0; 5294 for (i=0; i<nrecvs; i++) { 5295 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5296 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5297 for (j=0; j<nrows; j++) { 5298 b_othi[k+1] = b_othi[k] + rowlen[j]; 5299 len += rowlen[j]; k++; 5300 } 5301 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5302 } 5303 5304 /* allocate space for j and a arrrays of B_oth */ 5305 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5306 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5307 5308 /* j-array */ 5309 /*---------*/ 5310 /* post receives of j-array */ 5311 for (i=0; i<nrecvs; i++) { 5312 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5313 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5314 } 5315 5316 /* pack the outgoing message j-array */ 5317 k = 0; 5318 for (i=0; i<nsends; i++) { 5319 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5320 bufJ = bufj+sstartsj[i]; 5321 for (j=0; j<nrows; j++) { 5322 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5323 for (ll=0; ll<sbs; ll++) { 5324 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5325 for (l=0; l<ncols; l++) { 5326 *bufJ++ = cols[l]; 5327 } 5328 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5329 } 5330 } 5331 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5332 } 5333 5334 /* recvs and sends of j-array are completed */ 5335 i = nrecvs; 5336 while (i--) { 5337 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5338 } 5339 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5340 } else if (scall == MAT_REUSE_MATRIX) { 5341 sstartsj = *startsj_s; 5342 rstartsj = *startsj_r; 5343 bufa = *bufa_ptr; 5344 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5345 b_otha = b_oth->a; 5346 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5347 5348 /* a-array */ 5349 /*---------*/ 5350 /* post receives of a-array */ 5351 for (i=0; i<nrecvs; i++) { 5352 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5353 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5354 } 5355 5356 /* pack the outgoing message a-array */ 5357 k = 0; 5358 for (i=0; i<nsends; i++) { 5359 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5360 bufA = bufa+sstartsj[i]; 5361 for (j=0; j<nrows; j++) { 5362 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5363 for (ll=0; ll<sbs; ll++) { 5364 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5365 for (l=0; l<ncols; l++) { 5366 *bufA++ = vals[l]; 5367 } 5368 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5369 } 5370 } 5371 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5372 } 5373 /* recvs and sends of a-array are completed */ 5374 i = nrecvs; 5375 while (i--) { 5376 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5377 } 5378 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5379 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5380 5381 if (scall == MAT_INITIAL_MATRIX) { 5382 /* put together the new matrix */ 5383 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5384 5385 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5386 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5387 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5388 b_oth->free_a = PETSC_TRUE; 5389 b_oth->free_ij = PETSC_TRUE; 5390 b_oth->nonew = 0; 5391 5392 ierr = PetscFree(bufj);CHKERRQ(ierr); 5393 if (!startsj_s || !bufa_ptr) { 5394 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5395 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5396 } else { 5397 *startsj_s = sstartsj; 5398 *startsj_r = rstartsj; 5399 *bufa_ptr = bufa; 5400 } 5401 } 5402 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5403 PetscFunctionReturn(0); 5404 } 5405 5406 #undef __FUNCT__ 5407 #define __FUNCT__ "MatGetCommunicationStructs" 5408 /*@C 5409 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5410 5411 Not Collective 5412 5413 Input Parameters: 5414 . A - The matrix in mpiaij format 5415 5416 Output Parameter: 5417 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5418 . colmap - A map from global column index to local index into lvec 5419 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5420 5421 Level: developer 5422 5423 @*/ 5424 #if defined(PETSC_USE_CTABLE) 5425 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5426 #else 5427 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5428 #endif 5429 { 5430 Mat_MPIAIJ *a; 5431 5432 PetscFunctionBegin; 5433 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5434 PetscValidPointer(lvec, 2); 5435 PetscValidPointer(colmap, 3); 5436 PetscValidPointer(multScatter, 4); 5437 a = (Mat_MPIAIJ*) A->data; 5438 if (lvec) *lvec = a->lvec; 5439 if (colmap) *colmap = a->colmap; 5440 if (multScatter) *multScatter = a->Mvctx; 5441 PetscFunctionReturn(0); 5442 } 5443 5444 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5445 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5446 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5447 5448 #undef __FUNCT__ 5449 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5450 /* 5451 Computes (B'*A')' since computing B*A directly is untenable 5452 5453 n p p 5454 ( ) ( ) ( ) 5455 m ( A ) * n ( B ) = m ( C ) 5456 ( ) ( ) ( ) 5457 5458 */ 5459 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5460 { 5461 PetscErrorCode ierr; 5462 Mat At,Bt,Ct; 5463 5464 PetscFunctionBegin; 5465 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5466 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5467 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5468 ierr = MatDestroy(&At);CHKERRQ(ierr); 5469 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5470 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5471 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5472 PetscFunctionReturn(0); 5473 } 5474 5475 #undef __FUNCT__ 5476 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5477 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5478 { 5479 PetscErrorCode ierr; 5480 PetscInt m=A->rmap->n,n=B->cmap->n; 5481 Mat Cmat; 5482 5483 PetscFunctionBegin; 5484 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5485 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5486 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5487 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5488 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5489 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5490 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5491 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5492 5493 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5494 5495 *C = Cmat; 5496 PetscFunctionReturn(0); 5497 } 5498 5499 /* ----------------------------------------------------------------*/ 5500 #undef __FUNCT__ 5501 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5502 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5503 { 5504 PetscErrorCode ierr; 5505 5506 PetscFunctionBegin; 5507 if (scall == MAT_INITIAL_MATRIX) { 5508 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5509 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5510 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5511 } 5512 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5513 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5514 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5515 PetscFunctionReturn(0); 5516 } 5517 5518 #if defined(PETSC_HAVE_MUMPS) 5519 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5520 #endif 5521 #if defined(PETSC_HAVE_PASTIX) 5522 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5523 #endif 5524 #if defined(PETSC_HAVE_SUPERLU_DIST) 5525 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5526 #endif 5527 #if defined(PETSC_HAVE_CLIQUE) 5528 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5529 #endif 5530 5531 /*MC 5532 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5533 5534 Options Database Keys: 5535 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5536 5537 Level: beginner 5538 5539 .seealso: MatCreateAIJ() 5540 M*/ 5541 5542 #undef __FUNCT__ 5543 #define __FUNCT__ "MatCreate_MPIAIJ" 5544 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5545 { 5546 Mat_MPIAIJ *b; 5547 PetscErrorCode ierr; 5548 PetscMPIInt size; 5549 5550 PetscFunctionBegin; 5551 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5552 5553 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5554 B->data = (void*)b; 5555 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5556 B->assembled = PETSC_FALSE; 5557 B->insertmode = NOT_SET_VALUES; 5558 b->size = size; 5559 5560 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5561 5562 /* build cache for off array entries formed */ 5563 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5564 5565 b->donotstash = PETSC_FALSE; 5566 b->colmap = 0; 5567 b->garray = 0; 5568 b->roworiented = PETSC_TRUE; 5569 5570 /* stuff used for matrix vector multiply */ 5571 b->lvec = NULL; 5572 b->Mvctx = NULL; 5573 5574 /* stuff for MatGetRow() */ 5575 b->rowindices = 0; 5576 b->rowvalues = 0; 5577 b->getrowactive = PETSC_FALSE; 5578 5579 /* flexible pointer used in CUSP/CUSPARSE classes */ 5580 b->spptr = NULL; 5581 5582 #if defined(PETSC_HAVE_MUMPS) 5583 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5584 #endif 5585 #if defined(PETSC_HAVE_PASTIX) 5586 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5587 #endif 5588 #if defined(PETSC_HAVE_SUPERLU_DIST) 5589 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5590 #endif 5591 #if defined(PETSC_HAVE_CLIQUE) 5592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5593 #endif 5594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5608 PetscFunctionReturn(0); 5609 } 5610 5611 #undef __FUNCT__ 5612 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5613 /*@C 5614 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5615 and "off-diagonal" part of the matrix in CSR format. 5616 5617 Collective on MPI_Comm 5618 5619 Input Parameters: 5620 + comm - MPI communicator 5621 . m - number of local rows (Cannot be PETSC_DECIDE) 5622 . n - This value should be the same as the local size used in creating the 5623 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5624 calculated if N is given) For square matrices n is almost always m. 5625 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5626 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5627 . i - row indices for "diagonal" portion of matrix 5628 . j - column indices 5629 . a - matrix values 5630 . oi - row indices for "off-diagonal" portion of matrix 5631 . oj - column indices 5632 - oa - matrix values 5633 5634 Output Parameter: 5635 . mat - the matrix 5636 5637 Level: advanced 5638 5639 Notes: 5640 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5641 must free the arrays once the matrix has been destroyed and not before. 5642 5643 The i and j indices are 0 based 5644 5645 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5646 5647 This sets local rows and cannot be used to set off-processor values. 5648 5649 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5650 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5651 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5652 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5653 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5654 communication if it is known that only local entries will be set. 5655 5656 .keywords: matrix, aij, compressed row, sparse, parallel 5657 5658 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5659 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5660 C@*/ 5661 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5662 { 5663 PetscErrorCode ierr; 5664 Mat_MPIAIJ *maij; 5665 5666 PetscFunctionBegin; 5667 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5668 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5669 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5670 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5671 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5672 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5673 maij = (Mat_MPIAIJ*) (*mat)->data; 5674 5675 (*mat)->preallocated = PETSC_TRUE; 5676 5677 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5678 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5679 5680 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5681 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5682 5683 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5684 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5685 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5686 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5687 5688 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5689 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5690 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5691 PetscFunctionReturn(0); 5692 } 5693 5694 /* 5695 Special version for direct calls from Fortran 5696 */ 5697 #include <petsc-private/fortranimpl.h> 5698 5699 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5700 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5701 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5702 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5703 #endif 5704 5705 /* Change these macros so can be used in void function */ 5706 #undef CHKERRQ 5707 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5708 #undef SETERRQ2 5709 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5710 #undef SETERRQ3 5711 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5712 #undef SETERRQ 5713 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5714 5715 #undef __FUNCT__ 5716 #define __FUNCT__ "matsetvaluesmpiaij_" 5717 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5718 { 5719 Mat mat = *mmat; 5720 PetscInt m = *mm, n = *mn; 5721 InsertMode addv = *maddv; 5722 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5723 PetscScalar value; 5724 PetscErrorCode ierr; 5725 5726 MatCheckPreallocated(mat,1); 5727 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5728 5729 #if defined(PETSC_USE_DEBUG) 5730 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5731 #endif 5732 { 5733 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5734 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5735 PetscBool roworiented = aij->roworiented; 5736 5737 /* Some Variables required in the macro */ 5738 Mat A = aij->A; 5739 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5740 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5741 MatScalar *aa = a->a; 5742 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5743 Mat B = aij->B; 5744 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5745 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5746 MatScalar *ba = b->a; 5747 5748 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5749 PetscInt nonew = a->nonew; 5750 MatScalar *ap1,*ap2; 5751 5752 PetscFunctionBegin; 5753 for (i=0; i<m; i++) { 5754 if (im[i] < 0) continue; 5755 #if defined(PETSC_USE_DEBUG) 5756 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5757 #endif 5758 if (im[i] >= rstart && im[i] < rend) { 5759 row = im[i] - rstart; 5760 lastcol1 = -1; 5761 rp1 = aj + ai[row]; 5762 ap1 = aa + ai[row]; 5763 rmax1 = aimax[row]; 5764 nrow1 = ailen[row]; 5765 low1 = 0; 5766 high1 = nrow1; 5767 lastcol2 = -1; 5768 rp2 = bj + bi[row]; 5769 ap2 = ba + bi[row]; 5770 rmax2 = bimax[row]; 5771 nrow2 = bilen[row]; 5772 low2 = 0; 5773 high2 = nrow2; 5774 5775 for (j=0; j<n; j++) { 5776 if (roworiented) value = v[i*n+j]; 5777 else value = v[i+j*m]; 5778 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5779 if (in[j] >= cstart && in[j] < cend) { 5780 col = in[j] - cstart; 5781 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5782 } else if (in[j] < 0) continue; 5783 #if defined(PETSC_USE_DEBUG) 5784 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5785 #endif 5786 else { 5787 if (mat->was_assembled) { 5788 if (!aij->colmap) { 5789 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5790 } 5791 #if defined(PETSC_USE_CTABLE) 5792 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5793 col--; 5794 #else 5795 col = aij->colmap[in[j]] - 1; 5796 #endif 5797 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5798 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5799 col = in[j]; 5800 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5801 B = aij->B; 5802 b = (Mat_SeqAIJ*)B->data; 5803 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5804 rp2 = bj + bi[row]; 5805 ap2 = ba + bi[row]; 5806 rmax2 = bimax[row]; 5807 nrow2 = bilen[row]; 5808 low2 = 0; 5809 high2 = nrow2; 5810 bm = aij->B->rmap->n; 5811 ba = b->a; 5812 } 5813 } else col = in[j]; 5814 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5815 } 5816 } 5817 } else if (!aij->donotstash) { 5818 if (roworiented) { 5819 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5820 } else { 5821 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5822 } 5823 } 5824 } 5825 } 5826 PetscFunctionReturnVoid(); 5827 } 5828 5829