1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petsc-private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatDistribute_MPIAIJ" 193 /* 194 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 195 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 196 197 Only for square matrices 198 199 Used by a preconditioner, hence PETSC_EXTERN 200 */ 201 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 202 { 203 PetscMPIInt rank,size; 204 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 205 PetscErrorCode ierr; 206 Mat mat; 207 Mat_SeqAIJ *gmata; 208 PetscMPIInt tag; 209 MPI_Status status; 210 PetscBool aij; 211 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 212 213 PetscFunctionBegin; 214 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 215 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 216 if (!rank) { 217 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 218 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 219 } 220 if (reuse == MAT_INITIAL_MATRIX) { 221 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 222 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 223 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 224 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 225 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 226 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 227 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 228 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 229 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 230 231 rowners[0] = 0; 232 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 233 rstart = rowners[rank]; 234 rend = rowners[rank+1]; 235 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 236 if (!rank) { 237 gmata = (Mat_SeqAIJ*) gmat->data; 238 /* send row lengths to all processors */ 239 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 240 for (i=1; i<size; i++) { 241 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 242 } 243 /* determine number diagonal and off-diagonal counts */ 244 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 245 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 246 jj = 0; 247 for (i=0; i<m; i++) { 248 for (j=0; j<dlens[i]; j++) { 249 if (gmata->j[jj] < rstart) ld[i]++; 250 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 251 jj++; 252 } 253 } 254 /* send column indices to other processes */ 255 for (i=1; i<size; i++) { 256 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 257 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 258 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 259 } 260 261 /* send numerical values to other processes */ 262 for (i=1; i<size; i++) { 263 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 264 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 265 } 266 gmataa = gmata->a; 267 gmataj = gmata->j; 268 269 } else { 270 /* receive row lengths */ 271 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 272 /* receive column indices */ 273 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 274 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 276 /* determine number diagonal and off-diagonal counts */ 277 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 278 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 279 jj = 0; 280 for (i=0; i<m; i++) { 281 for (j=0; j<dlens[i]; j++) { 282 if (gmataj[jj] < rstart) ld[i]++; 283 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 284 jj++; 285 } 286 } 287 /* receive numerical values */ 288 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 289 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 290 } 291 /* set preallocation */ 292 for (i=0; i<m; i++) { 293 dlens[i] -= olens[i]; 294 } 295 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 296 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 297 298 for (i=0; i<m; i++) { 299 dlens[i] += olens[i]; 300 } 301 cnt = 0; 302 for (i=0; i<m; i++) { 303 row = rstart + i; 304 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 305 cnt += dlens[i]; 306 } 307 if (rank) { 308 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 309 } 310 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 311 ierr = PetscFree(rowners);CHKERRQ(ierr); 312 313 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 314 315 *inmat = mat; 316 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 317 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 318 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 319 mat = *inmat; 320 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 321 if (!rank) { 322 /* send numerical values to other processes */ 323 gmata = (Mat_SeqAIJ*) gmat->data; 324 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 325 gmataa = gmata->a; 326 for (i=1; i<size; i++) { 327 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 328 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 329 } 330 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 331 } else { 332 /* receive numerical values from process 0*/ 333 nz = Ad->nz + Ao->nz; 334 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 335 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 336 } 337 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 338 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 339 ad = Ad->a; 340 ao = Ao->a; 341 if (mat->rmap->n) { 342 i = 0; 343 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 344 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 345 } 346 for (i=1; i<mat->rmap->n; i++) { 347 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 348 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 349 } 350 i--; 351 if (mat->rmap->n) { 352 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 353 } 354 if (rank) { 355 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 356 } 357 } 358 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 /* 364 Local utility routine that creates a mapping from the global column 365 number to the local number in the off-diagonal part of the local 366 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 367 a slightly higher hash table cost; without it it is not scalable (each processor 368 has an order N integer array but is fast to acess. 369 */ 370 #undef __FUNCT__ 371 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 372 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 373 { 374 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 375 PetscErrorCode ierr; 376 PetscInt n = aij->B->cmap->n,i; 377 378 PetscFunctionBegin; 379 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 380 #if defined(PETSC_USE_CTABLE) 381 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 382 for (i=0; i<n; i++) { 383 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 384 } 385 #else 386 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 387 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 388 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 389 #endif 390 PetscFunctionReturn(0); 391 } 392 393 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 394 { \ 395 if (col <= lastcol1) low1 = 0; \ 396 else high1 = nrow1; \ 397 lastcol1 = col;\ 398 while (high1-low1 > 5) { \ 399 t = (low1+high1)/2; \ 400 if (rp1[t] > col) high1 = t; \ 401 else low1 = t; \ 402 } \ 403 for (_i=low1; _i<high1; _i++) { \ 404 if (rp1[_i] > col) break; \ 405 if (rp1[_i] == col) { \ 406 if (addv == ADD_VALUES) ap1[_i] += value; \ 407 else ap1[_i] = value; \ 408 goto a_noinsert; \ 409 } \ 410 } \ 411 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 412 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 413 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 414 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 415 N = nrow1++ - 1; a->nz++; high1++; \ 416 /* shift up all the later entries in this row */ \ 417 for (ii=N; ii>=_i; ii--) { \ 418 rp1[ii+1] = rp1[ii]; \ 419 ap1[ii+1] = ap1[ii]; \ 420 } \ 421 rp1[_i] = col; \ 422 ap1[_i] = value; \ 423 A->nonzerostate++;\ 424 a_noinsert: ; \ 425 ailen[row] = nrow1; \ 426 } 427 428 429 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 430 { \ 431 if (col <= lastcol2) low2 = 0; \ 432 else high2 = nrow2; \ 433 lastcol2 = col; \ 434 while (high2-low2 > 5) { \ 435 t = (low2+high2)/2; \ 436 if (rp2[t] > col) high2 = t; \ 437 else low2 = t; \ 438 } \ 439 for (_i=low2; _i<high2; _i++) { \ 440 if (rp2[_i] > col) break; \ 441 if (rp2[_i] == col) { \ 442 if (addv == ADD_VALUES) ap2[_i] += value; \ 443 else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 448 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 450 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 451 N = nrow2++ - 1; b->nz++; high2++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp2[ii+1] = rp2[ii]; \ 455 ap2[ii+1] = ap2[ii]; \ 456 } \ 457 rp2[_i] = col; \ 458 ap2[_i] = value; \ 459 B->nonzerostate++; \ 460 b_noinsert: ; \ 461 bilen[row] = nrow2; \ 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 466 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 467 { 468 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 469 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 470 PetscErrorCode ierr; 471 PetscInt l,*garray = mat->garray,diag; 472 473 PetscFunctionBegin; 474 /* code only works for square matrices A */ 475 476 /* find size of row to the left of the diagonal part */ 477 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 478 row = row - diag; 479 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 480 if (garray[b->j[b->i[row]+l]] > diag) break; 481 } 482 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 483 484 /* diagonal part */ 485 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 486 487 /* right of diagonal part */ 488 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatSetValues_MPIAIJ" 494 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 495 { 496 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 497 PetscScalar value; 498 PetscErrorCode ierr; 499 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 500 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 501 PetscBool roworiented = aij->roworiented; 502 503 /* Some Variables required in the macro */ 504 Mat A = aij->A; 505 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 506 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 507 MatScalar *aa = a->a; 508 PetscBool ignorezeroentries = a->ignorezeroentries; 509 Mat B = aij->B; 510 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 511 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 512 MatScalar *ba = b->a; 513 514 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 515 PetscInt nonew; 516 MatScalar *ap1,*ap2; 517 518 PetscFunctionBegin; 519 for (i=0; i<m; i++) { 520 if (im[i] < 0) continue; 521 #if defined(PETSC_USE_DEBUG) 522 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 523 #endif 524 if (im[i] >= rstart && im[i] < rend) { 525 row = im[i] - rstart; 526 lastcol1 = -1; 527 rp1 = aj + ai[row]; 528 ap1 = aa + ai[row]; 529 rmax1 = aimax[row]; 530 nrow1 = ailen[row]; 531 low1 = 0; 532 high1 = nrow1; 533 lastcol2 = -1; 534 rp2 = bj + bi[row]; 535 ap2 = ba + bi[row]; 536 rmax2 = bimax[row]; 537 nrow2 = bilen[row]; 538 low2 = 0; 539 high2 = nrow2; 540 541 for (j=0; j<n; j++) { 542 if (roworiented) value = v[i*n+j]; 543 else value = v[i+j*m]; 544 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 545 if (in[j] >= cstart && in[j] < cend) { 546 col = in[j] - cstart; 547 nonew = a->nonew; 548 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 549 } else if (in[j] < 0) continue; 550 #if defined(PETSC_USE_DEBUG) 551 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 552 #endif 553 else { 554 if (mat->was_assembled) { 555 if (!aij->colmap) { 556 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 557 } 558 #if defined(PETSC_USE_CTABLE) 559 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 560 col--; 561 #else 562 col = aij->colmap[in[j]] - 1; 563 #endif 564 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 565 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 566 col = in[j]; 567 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 568 B = aij->B; 569 b = (Mat_SeqAIJ*)B->data; 570 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 571 rp2 = bj + bi[row]; 572 ap2 = ba + bi[row]; 573 rmax2 = bimax[row]; 574 nrow2 = bilen[row]; 575 low2 = 0; 576 high2 = nrow2; 577 bm = aij->B->rmap->n; 578 ba = b->a; 579 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 580 } else col = in[j]; 581 nonew = b->nonew; 582 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 583 } 584 } 585 } else { 586 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 587 if (!aij->donotstash) { 588 mat->assembled = PETSC_FALSE; 589 if (roworiented) { 590 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 591 } else { 592 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 593 } 594 } 595 } 596 } 597 PetscFunctionReturn(0); 598 } 599 600 #undef __FUNCT__ 601 #define __FUNCT__ "MatGetValues_MPIAIJ" 602 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 603 { 604 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 605 PetscErrorCode ierr; 606 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 607 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 608 609 PetscFunctionBegin; 610 for (i=0; i<m; i++) { 611 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 612 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 613 if (idxm[i] >= rstart && idxm[i] < rend) { 614 row = idxm[i] - rstart; 615 for (j=0; j<n; j++) { 616 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 617 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 618 if (idxn[j] >= cstart && idxn[j] < cend) { 619 col = idxn[j] - cstart; 620 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 621 } else { 622 if (!aij->colmap) { 623 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 624 } 625 #if defined(PETSC_USE_CTABLE) 626 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 627 col--; 628 #else 629 col = aij->colmap[idxn[j]] - 1; 630 #endif 631 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 632 else { 633 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 634 } 635 } 636 } 637 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 638 } 639 PetscFunctionReturn(0); 640 } 641 642 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 643 644 #undef __FUNCT__ 645 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 646 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 647 { 648 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 649 PetscErrorCode ierr; 650 PetscInt nstash,reallocs; 651 InsertMode addv; 652 653 PetscFunctionBegin; 654 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 655 656 /* make sure all processors are either in INSERTMODE or ADDMODE */ 657 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 658 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 659 mat->insertmode = addv; /* in case this processor had no cache */ 660 661 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 662 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 663 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 669 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 670 { 671 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 673 PetscErrorCode ierr; 674 PetscMPIInt n; 675 PetscInt i,j,rstart,ncols,flg; 676 PetscInt *row,*col; 677 PetscBool other_disassembled; 678 PetscScalar *val; 679 InsertMode addv = mat->insertmode; 680 681 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 682 683 PetscFunctionBegin; 684 if (!aij->donotstash && !mat->nooffprocentries) { 685 while (1) { 686 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 687 if (!flg) break; 688 689 for (i=0; i<n; ) { 690 /* Now identify the consecutive vals belonging to the same row */ 691 for (j=i,rstart=row[j]; j<n; j++) { 692 if (row[j] != rstart) break; 693 } 694 if (j < n) ncols = j-i; 695 else ncols = n-i; 696 /* Now assemble all these values with a single function call */ 697 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 698 699 i = j; 700 } 701 } 702 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 703 } 704 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 705 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 706 707 /* determine if any processor has disassembled, if so we must 708 also disassemble ourselfs, in order that we may reassemble. */ 709 /* 710 if nonzero structure of submatrix B cannot change then we know that 711 no processor disassembled thus we can skip this stuff 712 */ 713 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 714 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 715 if (mat->was_assembled && !other_disassembled) { 716 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 717 } 718 } 719 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 720 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 721 } 722 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 723 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 724 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 725 726 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 727 728 aij->rowvalues = 0; 729 730 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 731 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 732 733 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 734 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 735 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 736 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 737 } 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 743 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 744 { 745 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 746 PetscErrorCode ierr; 747 748 PetscFunctionBegin; 749 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 750 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatZeroRows_MPIAIJ" 756 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 757 { 758 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 759 PetscInt *owners = A->rmap->range; 760 PetscInt n = A->rmap->n; 761 PetscSF sf; 762 PetscInt *lrows; 763 PetscSFNode *rrows; 764 PetscInt r, p = 0, len = 0; 765 PetscErrorCode ierr; 766 767 PetscFunctionBegin; 768 /* Create SF where leaves are input rows and roots are owned rows */ 769 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 770 for (r = 0; r < n; ++r) lrows[r] = -1; 771 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 772 for (r = 0; r < N; ++r) { 773 const PetscInt idx = rows[r]; 774 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 775 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 776 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 777 } 778 if (A->nooffproczerorows) { 779 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 780 lrows[len++] = idx - owners[p]; 781 } else { 782 rrows[r].rank = p; 783 rrows[r].index = rows[r] - owners[p]; 784 } 785 } 786 if (!A->nooffproczerorows) { 787 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 788 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 789 /* Collect flags for rows to be zeroed */ 790 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 791 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 792 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 793 /* Compress and put in row numbers */ 794 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 795 } 796 /* fix right hand side if needed */ 797 if (x && b) { 798 const PetscScalar *xx; 799 PetscScalar *bb; 800 801 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 802 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 803 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 804 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 805 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 806 } 807 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 808 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 810 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 811 } else if (diag != 0.0) { 812 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 813 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 814 for (r = 0; r < len; ++r) { 815 const PetscInt row = lrows[r] + A->rmap->rstart; 816 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 817 } 818 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 820 } else { 821 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 822 } 823 ierr = PetscFree(lrows);CHKERRQ(ierr); 824 825 /* only change matrix nonzero state if pattern was allowed to be changed */ 826 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 827 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 828 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 829 } 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 835 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 836 { 837 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 838 PetscErrorCode ierr; 839 PetscMPIInt n = A->rmap->n; 840 PetscInt i,j,r,m,p = 0,len = 0; 841 PetscInt *lrows,*owners = A->rmap->range; 842 PetscSFNode *rrows; 843 PetscSF sf; 844 const PetscScalar *xx; 845 PetscScalar *bb,*mask; 846 Vec xmask,lmask; 847 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 848 const PetscInt *aj, *ii,*ridx; 849 PetscScalar *aa; 850 851 PetscFunctionBegin; 852 /* Create SF where leaves are input rows and roots are owned rows */ 853 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 854 for (r = 0; r < n; ++r) lrows[r] = -1; 855 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 856 for (r = 0; r < N; ++r) { 857 const PetscInt idx = rows[r]; 858 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 859 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 860 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 861 } 862 rrows[r].rank = p; 863 rrows[r].index = rows[r] - owners[p]; 864 } 865 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 866 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 867 /* Collect flags for rows to be zeroed */ 868 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 869 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 870 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 871 /* Compress and put in row numbers */ 872 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 873 /* zero diagonal part of matrix */ 874 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 875 /* handle off diagonal part of matrix */ 876 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 877 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 878 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 879 for (i=0; i<len; i++) bb[lrows[i]] = 1; 880 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 881 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 884 if (x) { 885 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 888 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 889 } 890 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 891 /* remove zeroed rows of off diagonal matrix */ 892 ii = aij->i; 893 for (i=0; i<len; i++) { 894 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 895 } 896 /* loop over all elements of off process part of matrix zeroing removed columns*/ 897 if (aij->compressedrow.use) { 898 m = aij->compressedrow.nrows; 899 ii = aij->compressedrow.i; 900 ridx = aij->compressedrow.rindex; 901 for (i=0; i<m; i++) { 902 n = ii[i+1] - ii[i]; 903 aj = aij->j + ii[i]; 904 aa = aij->a + ii[i]; 905 906 for (j=0; j<n; j++) { 907 if (PetscAbsScalar(mask[*aj])) { 908 if (b) bb[*ridx] -= *aa*xx[*aj]; 909 *aa = 0.0; 910 } 911 aa++; 912 aj++; 913 } 914 ridx++; 915 } 916 } else { /* do not use compressed row format */ 917 m = l->B->rmap->n; 918 for (i=0; i<m; i++) { 919 n = ii[i+1] - ii[i]; 920 aj = aij->j + ii[i]; 921 aa = aij->a + ii[i]; 922 for (j=0; j<n; j++) { 923 if (PetscAbsScalar(mask[*aj])) { 924 if (b) bb[i] -= *aa*xx[*aj]; 925 *aa = 0.0; 926 } 927 aa++; 928 aj++; 929 } 930 } 931 } 932 if (x) { 933 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 934 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 935 } 936 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 937 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 938 ierr = PetscFree(lrows);CHKERRQ(ierr); 939 940 /* only change matrix nonzero state if pattern was allowed to be changed */ 941 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 942 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 943 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 944 } 945 PetscFunctionReturn(0); 946 } 947 948 #undef __FUNCT__ 949 #define __FUNCT__ "MatMult_MPIAIJ" 950 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 951 { 952 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 953 PetscErrorCode ierr; 954 PetscInt nt; 955 956 PetscFunctionBegin; 957 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 958 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 959 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 960 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 961 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 968 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 969 { 970 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 971 PetscErrorCode ierr; 972 973 PetscFunctionBegin; 974 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatMultAdd_MPIAIJ" 980 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 981 { 982 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 988 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 998 PetscErrorCode ierr; 999 PetscBool merged; 1000 1001 PetscFunctionBegin; 1002 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1003 /* do nondiagonal part */ 1004 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1005 if (!merged) { 1006 /* send it on its way */ 1007 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1008 /* do local part */ 1009 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1010 /* receive remote parts: note this assumes the values are not actually */ 1011 /* added in yy until the next line, */ 1012 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1013 } else { 1014 /* do local part */ 1015 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1016 /* send it on its way */ 1017 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1018 /* values actually were received in the Begin() but we need to call this nop */ 1019 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1026 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1027 { 1028 MPI_Comm comm; 1029 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1030 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1031 IS Me,Notme; 1032 PetscErrorCode ierr; 1033 PetscInt M,N,first,last,*notme,i; 1034 PetscMPIInt size; 1035 1036 PetscFunctionBegin; 1037 /* Easy test: symmetric diagonal block */ 1038 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1039 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1040 if (!*f) PetscFunctionReturn(0); 1041 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1042 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1043 if (size == 1) PetscFunctionReturn(0); 1044 1045 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1046 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1047 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1048 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1049 for (i=0; i<first; i++) notme[i] = i; 1050 for (i=last; i<M; i++) notme[i-last+first] = i; 1051 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1052 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1053 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1054 Aoff = Aoffs[0]; 1055 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1056 Boff = Boffs[0]; 1057 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1058 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1059 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1060 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1061 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1062 ierr = PetscFree(notme);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 /* do nondiagonal part */ 1075 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1076 /* send it on its way */ 1077 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1078 /* do local part */ 1079 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1080 /* receive remote parts */ 1081 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 /* 1086 This only works correctly for square matrices where the subblock A->A is the 1087 diagonal block 1088 */ 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1091 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1092 { 1093 PetscErrorCode ierr; 1094 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1095 1096 PetscFunctionBegin; 1097 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1098 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1099 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatScale_MPIAIJ" 1105 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1112 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatDestroy_MPIAIJ" 1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1119 { 1120 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1121 PetscErrorCode ierr; 1122 1123 PetscFunctionBegin; 1124 #if defined(PETSC_USE_LOG) 1125 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1126 #endif 1127 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1128 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1129 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1130 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1131 #if defined(PETSC_USE_CTABLE) 1132 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1133 #else 1134 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1135 #endif 1136 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1137 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1138 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1139 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1140 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1141 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1142 1143 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1152 #if defined(PETSC_HAVE_ELEMENTAL) 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1154 #endif 1155 PetscFunctionReturn(0); 1156 } 1157 1158 #undef __FUNCT__ 1159 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1160 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1161 { 1162 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1163 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1164 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1165 PetscErrorCode ierr; 1166 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1167 int fd; 1168 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1169 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1170 PetscScalar *column_values; 1171 PetscInt message_count,flowcontrolcount; 1172 FILE *file; 1173 1174 PetscFunctionBegin; 1175 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1176 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1177 nz = A->nz + B->nz; 1178 if (!rank) { 1179 header[0] = MAT_FILE_CLASSID; 1180 header[1] = mat->rmap->N; 1181 header[2] = mat->cmap->N; 1182 1183 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1184 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1185 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 /* get largest number of rows any processor has */ 1187 rlen = mat->rmap->n; 1188 range = mat->rmap->range; 1189 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1190 } else { 1191 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1192 rlen = mat->rmap->n; 1193 } 1194 1195 /* load up the local row counts */ 1196 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1197 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1198 1199 /* store the row lengths to the file */ 1200 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1201 if (!rank) { 1202 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1203 for (i=1; i<size; i++) { 1204 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1205 rlen = range[i+1] - range[i]; 1206 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1207 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1208 } 1209 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1210 } else { 1211 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1212 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1213 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1214 } 1215 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1216 1217 /* load up the local column indices */ 1218 nzmax = nz; /* th processor needs space a largest processor needs */ 1219 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1220 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1221 cnt = 0; 1222 for (i=0; i<mat->rmap->n; i++) { 1223 for (j=B->i[i]; j<B->i[i+1]; j++) { 1224 if ((col = garray[B->j[j]]) > cstart) break; 1225 column_indices[cnt++] = col; 1226 } 1227 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1228 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1229 } 1230 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1231 1232 /* store the column indices to the file */ 1233 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1234 if (!rank) { 1235 MPI_Status status; 1236 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1237 for (i=1; i<size; i++) { 1238 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1239 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1240 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1241 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1242 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1243 } 1244 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1245 } else { 1246 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1247 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1248 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1249 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1250 } 1251 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1252 1253 /* load up the local column values */ 1254 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1255 cnt = 0; 1256 for (i=0; i<mat->rmap->n; i++) { 1257 for (j=B->i[i]; j<B->i[i+1]; j++) { 1258 if (garray[B->j[j]] > cstart) break; 1259 column_values[cnt++] = B->a[j]; 1260 } 1261 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1262 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1263 } 1264 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1265 1266 /* store the column values to the file */ 1267 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1268 if (!rank) { 1269 MPI_Status status; 1270 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1271 for (i=1; i<size; i++) { 1272 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1273 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1274 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1275 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1276 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1277 } 1278 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1279 } else { 1280 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1281 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1282 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1283 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1284 } 1285 ierr = PetscFree(column_values);CHKERRQ(ierr); 1286 1287 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1288 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #include <petscdraw.h> 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1295 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1296 { 1297 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1298 PetscErrorCode ierr; 1299 PetscMPIInt rank = aij->rank,size = aij->size; 1300 PetscBool isdraw,iascii,isbinary; 1301 PetscViewer sviewer; 1302 PetscViewerFormat format; 1303 1304 PetscFunctionBegin; 1305 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1306 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1307 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1308 if (iascii) { 1309 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1310 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1311 MatInfo info; 1312 PetscBool inodes; 1313 1314 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1315 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1316 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1317 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1318 if (!inodes) { 1319 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1320 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1321 } else { 1322 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1323 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1324 } 1325 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1326 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1327 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1328 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1329 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1330 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1331 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1332 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1335 PetscInt inodecount,inodelimit,*inodes; 1336 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1337 if (inodes) { 1338 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1339 } else { 1340 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1341 } 1342 PetscFunctionReturn(0); 1343 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1344 PetscFunctionReturn(0); 1345 } 1346 } else if (isbinary) { 1347 if (size == 1) { 1348 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1349 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1350 } else { 1351 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1352 } 1353 PetscFunctionReturn(0); 1354 } else if (isdraw) { 1355 PetscDraw draw; 1356 PetscBool isnull; 1357 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1358 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1359 } 1360 1361 { 1362 /* assemble the entire matrix onto first processor. */ 1363 Mat A; 1364 Mat_SeqAIJ *Aloc; 1365 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1366 MatScalar *a; 1367 1368 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1369 if (!rank) { 1370 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1371 } else { 1372 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1373 } 1374 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1375 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1376 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1377 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1378 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1379 1380 /* copy over the A part */ 1381 Aloc = (Mat_SeqAIJ*)aij->A->data; 1382 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1383 row = mat->rmap->rstart; 1384 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1385 for (i=0; i<m; i++) { 1386 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1387 row++; 1388 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1389 } 1390 aj = Aloc->j; 1391 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1392 1393 /* copy over the B part */ 1394 Aloc = (Mat_SeqAIJ*)aij->B->data; 1395 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1396 row = mat->rmap->rstart; 1397 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1398 ct = cols; 1399 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1400 for (i=0; i<m; i++) { 1401 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1402 row++; 1403 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1404 } 1405 ierr = PetscFree(ct);CHKERRQ(ierr); 1406 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1407 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1408 /* 1409 Everyone has to call to draw the matrix since the graphics waits are 1410 synchronized across all processors that share the PetscDraw object 1411 */ 1412 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1413 if (!rank) { 1414 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1415 } 1416 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1417 ierr = MatDestroy(&A);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatView_MPIAIJ" 1424 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1425 { 1426 PetscErrorCode ierr; 1427 PetscBool iascii,isdraw,issocket,isbinary; 1428 1429 PetscFunctionBegin; 1430 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1431 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1433 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1434 if (iascii || isdraw || isbinary || issocket) { 1435 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1436 } 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatSOR_MPIAIJ" 1442 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1443 { 1444 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1445 PetscErrorCode ierr; 1446 Vec bb1 = 0; 1447 PetscBool hasop; 1448 1449 PetscFunctionBegin; 1450 if (flag == SOR_APPLY_UPPER) { 1451 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1452 PetscFunctionReturn(0); 1453 } 1454 1455 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1456 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1457 } 1458 1459 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1460 if (flag & SOR_ZERO_INITIAL_GUESS) { 1461 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1462 its--; 1463 } 1464 1465 while (its--) { 1466 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1467 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1468 1469 /* update rhs: bb1 = bb - B*x */ 1470 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1471 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1472 1473 /* local sweep */ 1474 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1475 } 1476 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1477 if (flag & SOR_ZERO_INITIAL_GUESS) { 1478 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1479 its--; 1480 } 1481 while (its--) { 1482 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1483 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1484 1485 /* update rhs: bb1 = bb - B*x */ 1486 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1487 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1488 1489 /* local sweep */ 1490 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1491 } 1492 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1493 if (flag & SOR_ZERO_INITIAL_GUESS) { 1494 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1495 its--; 1496 } 1497 while (its--) { 1498 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1499 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1500 1501 /* update rhs: bb1 = bb - B*x */ 1502 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1503 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1504 1505 /* local sweep */ 1506 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1507 } 1508 } else if (flag & SOR_EISENSTAT) { 1509 Vec xx1; 1510 1511 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1512 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1513 1514 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1516 if (!mat->diag) { 1517 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1518 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1519 } 1520 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1521 if (hasop) { 1522 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1523 } else { 1524 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1525 } 1526 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1527 1528 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1529 1530 /* local sweep */ 1531 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1532 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1533 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1534 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1535 1536 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatPermute_MPIAIJ" 1542 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1543 { 1544 Mat aA,aB,Aperm; 1545 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1546 PetscScalar *aa,*ba; 1547 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1548 PetscSF rowsf,sf; 1549 IS parcolp = NULL; 1550 PetscBool done; 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1555 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1556 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1557 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1558 1559 /* Invert row permutation to find out where my rows should go */ 1560 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1561 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1562 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1563 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1564 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1565 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1566 1567 /* Invert column permutation to find out where my columns should go */ 1568 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1569 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1570 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1571 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1572 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1573 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1574 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1575 1576 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1577 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1578 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1579 1580 /* Find out where my gcols should go */ 1581 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1582 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1583 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1584 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1585 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1586 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1587 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1588 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1589 1590 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1591 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1592 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1593 for (i=0; i<m; i++) { 1594 PetscInt row = rdest[i],rowner; 1595 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1596 for (j=ai[i]; j<ai[i+1]; j++) { 1597 PetscInt cowner,col = cdest[aj[j]]; 1598 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1599 if (rowner == cowner) dnnz[i]++; 1600 else onnz[i]++; 1601 } 1602 for (j=bi[i]; j<bi[i+1]; j++) { 1603 PetscInt cowner,col = gcdest[bj[j]]; 1604 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1605 if (rowner == cowner) dnnz[i]++; 1606 else onnz[i]++; 1607 } 1608 } 1609 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1610 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1611 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1612 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1613 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1614 1615 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1616 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1617 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1618 for (i=0; i<m; i++) { 1619 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1620 PetscInt j0,rowlen; 1621 rowlen = ai[i+1] - ai[i]; 1622 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1623 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1624 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1625 } 1626 rowlen = bi[i+1] - bi[i]; 1627 for (j0=j=0; j<rowlen; j0=j) { 1628 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1629 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1630 } 1631 } 1632 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1633 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1634 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1635 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1636 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1637 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1638 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1639 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1640 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1641 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1642 *B = Aperm; 1643 PetscFunctionReturn(0); 1644 } 1645 1646 #undef __FUNCT__ 1647 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1648 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1649 { 1650 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1651 Mat A = mat->A,B = mat->B; 1652 PetscErrorCode ierr; 1653 PetscReal isend[5],irecv[5]; 1654 1655 PetscFunctionBegin; 1656 info->block_size = 1.0; 1657 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1658 1659 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1660 isend[3] = info->memory; isend[4] = info->mallocs; 1661 1662 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1663 1664 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1665 isend[3] += info->memory; isend[4] += info->mallocs; 1666 if (flag == MAT_LOCAL) { 1667 info->nz_used = isend[0]; 1668 info->nz_allocated = isend[1]; 1669 info->nz_unneeded = isend[2]; 1670 info->memory = isend[3]; 1671 info->mallocs = isend[4]; 1672 } else if (flag == MAT_GLOBAL_MAX) { 1673 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1674 1675 info->nz_used = irecv[0]; 1676 info->nz_allocated = irecv[1]; 1677 info->nz_unneeded = irecv[2]; 1678 info->memory = irecv[3]; 1679 info->mallocs = irecv[4]; 1680 } else if (flag == MAT_GLOBAL_SUM) { 1681 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1682 1683 info->nz_used = irecv[0]; 1684 info->nz_allocated = irecv[1]; 1685 info->nz_unneeded = irecv[2]; 1686 info->memory = irecv[3]; 1687 info->mallocs = irecv[4]; 1688 } 1689 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1690 info->fill_ratio_needed = 0; 1691 info->factor_mallocs = 0; 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatSetOption_MPIAIJ" 1697 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1698 { 1699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1700 PetscErrorCode ierr; 1701 1702 PetscFunctionBegin; 1703 switch (op) { 1704 case MAT_NEW_NONZERO_LOCATIONS: 1705 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1706 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1707 case MAT_KEEP_NONZERO_PATTERN: 1708 case MAT_NEW_NONZERO_LOCATION_ERR: 1709 case MAT_USE_INODES: 1710 case MAT_IGNORE_ZERO_ENTRIES: 1711 MatCheckPreallocated(A,1); 1712 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1713 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1714 break; 1715 case MAT_ROW_ORIENTED: 1716 a->roworiented = flg; 1717 1718 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1719 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1720 break; 1721 case MAT_NEW_DIAGONALS: 1722 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1723 break; 1724 case MAT_IGNORE_OFF_PROC_ENTRIES: 1725 a->donotstash = flg; 1726 break; 1727 case MAT_SPD: 1728 A->spd_set = PETSC_TRUE; 1729 A->spd = flg; 1730 if (flg) { 1731 A->symmetric = PETSC_TRUE; 1732 A->structurally_symmetric = PETSC_TRUE; 1733 A->symmetric_set = PETSC_TRUE; 1734 A->structurally_symmetric_set = PETSC_TRUE; 1735 } 1736 break; 1737 case MAT_SYMMETRIC: 1738 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1739 break; 1740 case MAT_STRUCTURALLY_SYMMETRIC: 1741 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1742 break; 1743 case MAT_HERMITIAN: 1744 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1745 break; 1746 case MAT_SYMMETRY_ETERNAL: 1747 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1748 break; 1749 default: 1750 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1751 } 1752 PetscFunctionReturn(0); 1753 } 1754 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "MatGetRow_MPIAIJ" 1757 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1758 { 1759 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1760 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1761 PetscErrorCode ierr; 1762 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1763 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1764 PetscInt *cmap,*idx_p; 1765 1766 PetscFunctionBegin; 1767 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1768 mat->getrowactive = PETSC_TRUE; 1769 1770 if (!mat->rowvalues && (idx || v)) { 1771 /* 1772 allocate enough space to hold information from the longest row. 1773 */ 1774 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1775 PetscInt max = 1,tmp; 1776 for (i=0; i<matin->rmap->n; i++) { 1777 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1778 if (max < tmp) max = tmp; 1779 } 1780 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1781 } 1782 1783 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1784 lrow = row - rstart; 1785 1786 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1787 if (!v) {pvA = 0; pvB = 0;} 1788 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1789 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1790 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1791 nztot = nzA + nzB; 1792 1793 cmap = mat->garray; 1794 if (v || idx) { 1795 if (nztot) { 1796 /* Sort by increasing column numbers, assuming A and B already sorted */ 1797 PetscInt imark = -1; 1798 if (v) { 1799 *v = v_p = mat->rowvalues; 1800 for (i=0; i<nzB; i++) { 1801 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1802 else break; 1803 } 1804 imark = i; 1805 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1806 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1807 } 1808 if (idx) { 1809 *idx = idx_p = mat->rowindices; 1810 if (imark > -1) { 1811 for (i=0; i<imark; i++) { 1812 idx_p[i] = cmap[cworkB[i]]; 1813 } 1814 } else { 1815 for (i=0; i<nzB; i++) { 1816 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1817 else break; 1818 } 1819 imark = i; 1820 } 1821 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1822 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1823 } 1824 } else { 1825 if (idx) *idx = 0; 1826 if (v) *v = 0; 1827 } 1828 } 1829 *nz = nztot; 1830 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1831 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1837 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1838 { 1839 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1840 1841 PetscFunctionBegin; 1842 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1843 aij->getrowactive = PETSC_FALSE; 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatNorm_MPIAIJ" 1849 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1850 { 1851 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1852 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1853 PetscErrorCode ierr; 1854 PetscInt i,j,cstart = mat->cmap->rstart; 1855 PetscReal sum = 0.0; 1856 MatScalar *v; 1857 1858 PetscFunctionBegin; 1859 if (aij->size == 1) { 1860 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1861 } else { 1862 if (type == NORM_FROBENIUS) { 1863 v = amat->a; 1864 for (i=0; i<amat->nz; i++) { 1865 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1866 } 1867 v = bmat->a; 1868 for (i=0; i<bmat->nz; i++) { 1869 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1870 } 1871 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1872 *norm = PetscSqrtReal(*norm); 1873 } else if (type == NORM_1) { /* max column norm */ 1874 PetscReal *tmp,*tmp2; 1875 PetscInt *jj,*garray = aij->garray; 1876 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1877 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1878 *norm = 0.0; 1879 v = amat->a; jj = amat->j; 1880 for (j=0; j<amat->nz; j++) { 1881 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1882 } 1883 v = bmat->a; jj = bmat->j; 1884 for (j=0; j<bmat->nz; j++) { 1885 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1886 } 1887 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1888 for (j=0; j<mat->cmap->N; j++) { 1889 if (tmp2[j] > *norm) *norm = tmp2[j]; 1890 } 1891 ierr = PetscFree(tmp);CHKERRQ(ierr); 1892 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1893 } else if (type == NORM_INFINITY) { /* max row norm */ 1894 PetscReal ntemp = 0.0; 1895 for (j=0; j<aij->A->rmap->n; j++) { 1896 v = amat->a + amat->i[j]; 1897 sum = 0.0; 1898 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1899 sum += PetscAbsScalar(*v); v++; 1900 } 1901 v = bmat->a + bmat->i[j]; 1902 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1903 sum += PetscAbsScalar(*v); v++; 1904 } 1905 if (sum > ntemp) ntemp = sum; 1906 } 1907 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1908 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1909 } 1910 PetscFunctionReturn(0); 1911 } 1912 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatTranspose_MPIAIJ" 1915 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1916 { 1917 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1918 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1919 PetscErrorCode ierr; 1920 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1921 PetscInt cstart = A->cmap->rstart,ncol; 1922 Mat B; 1923 MatScalar *array; 1924 1925 PetscFunctionBegin; 1926 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1927 1928 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1929 ai = Aloc->i; aj = Aloc->j; 1930 bi = Bloc->i; bj = Bloc->j; 1931 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1932 PetscInt *d_nnz,*g_nnz,*o_nnz; 1933 PetscSFNode *oloc; 1934 PETSC_UNUSED PetscSF sf; 1935 1936 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1937 /* compute d_nnz for preallocation */ 1938 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1939 for (i=0; i<ai[ma]; i++) { 1940 d_nnz[aj[i]]++; 1941 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1942 } 1943 /* compute local off-diagonal contributions */ 1944 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1945 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1946 /* map those to global */ 1947 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1948 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1949 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1950 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1951 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1952 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1953 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1954 1955 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1956 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1957 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1958 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1959 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1960 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1961 } else { 1962 B = *matout; 1963 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1964 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1965 } 1966 1967 /* copy over the A part */ 1968 array = Aloc->a; 1969 row = A->rmap->rstart; 1970 for (i=0; i<ma; i++) { 1971 ncol = ai[i+1]-ai[i]; 1972 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1973 row++; 1974 array += ncol; aj += ncol; 1975 } 1976 aj = Aloc->j; 1977 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1978 1979 /* copy over the B part */ 1980 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1981 array = Bloc->a; 1982 row = A->rmap->rstart; 1983 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1984 cols_tmp = cols; 1985 for (i=0; i<mb; i++) { 1986 ncol = bi[i+1]-bi[i]; 1987 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1988 row++; 1989 array += ncol; cols_tmp += ncol; 1990 } 1991 ierr = PetscFree(cols);CHKERRQ(ierr); 1992 1993 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1994 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1995 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1996 *matout = B; 1997 } else { 1998 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2005 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2006 { 2007 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2008 Mat a = aij->A,b = aij->B; 2009 PetscErrorCode ierr; 2010 PetscInt s1,s2,s3; 2011 2012 PetscFunctionBegin; 2013 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2014 if (rr) { 2015 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2016 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2017 /* Overlap communication with computation. */ 2018 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2019 } 2020 if (ll) { 2021 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2022 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2023 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2024 } 2025 /* scale the diagonal block */ 2026 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2027 2028 if (rr) { 2029 /* Do a scatter end and then right scale the off-diagonal block */ 2030 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2031 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2032 } 2033 PetscFunctionReturn(0); 2034 } 2035 2036 #undef __FUNCT__ 2037 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2038 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2039 { 2040 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2041 PetscErrorCode ierr; 2042 2043 PetscFunctionBegin; 2044 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2045 PetscFunctionReturn(0); 2046 } 2047 2048 #undef __FUNCT__ 2049 #define __FUNCT__ "MatEqual_MPIAIJ" 2050 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2051 { 2052 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2053 Mat a,b,c,d; 2054 PetscBool flg; 2055 PetscErrorCode ierr; 2056 2057 PetscFunctionBegin; 2058 a = matA->A; b = matA->B; 2059 c = matB->A; d = matB->B; 2060 2061 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2062 if (flg) { 2063 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2064 } 2065 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2066 PetscFunctionReturn(0); 2067 } 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatCopy_MPIAIJ" 2071 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2072 { 2073 PetscErrorCode ierr; 2074 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2075 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2076 2077 PetscFunctionBegin; 2078 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2079 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2080 /* because of the column compression in the off-processor part of the matrix a->B, 2081 the number of columns in a->B and b->B may be different, hence we cannot call 2082 the MatCopy() directly on the two parts. If need be, we can provide a more 2083 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2084 then copying the submatrices */ 2085 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2086 } else { 2087 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2088 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 #undef __FUNCT__ 2094 #define __FUNCT__ "MatSetUp_MPIAIJ" 2095 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2096 { 2097 PetscErrorCode ierr; 2098 2099 PetscFunctionBegin; 2100 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2101 PetscFunctionReturn(0); 2102 } 2103 2104 /* 2105 Computes the number of nonzeros per row needed for preallocation when X and Y 2106 have different nonzero structure. 2107 */ 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2110 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2111 { 2112 PetscInt i,j,k,nzx,nzy; 2113 2114 PetscFunctionBegin; 2115 /* Set the number of nonzeros in the new matrix */ 2116 for (i=0; i<m; i++) { 2117 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2118 nzx = xi[i+1] - xi[i]; 2119 nzy = yi[i+1] - yi[i]; 2120 nnz[i] = 0; 2121 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2122 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2123 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2124 nnz[i]++; 2125 } 2126 for (; k<nzy; k++) nnz[i]++; 2127 } 2128 PetscFunctionReturn(0); 2129 } 2130 2131 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2132 #undef __FUNCT__ 2133 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2134 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2135 { 2136 PetscErrorCode ierr; 2137 PetscInt m = Y->rmap->N; 2138 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2139 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2140 2141 PetscFunctionBegin; 2142 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 #undef __FUNCT__ 2147 #define __FUNCT__ "MatAXPY_MPIAIJ" 2148 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2149 { 2150 PetscErrorCode ierr; 2151 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2152 PetscBLASInt bnz,one=1; 2153 Mat_SeqAIJ *x,*y; 2154 2155 PetscFunctionBegin; 2156 if (str == SAME_NONZERO_PATTERN) { 2157 PetscScalar alpha = a; 2158 x = (Mat_SeqAIJ*)xx->A->data; 2159 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2160 y = (Mat_SeqAIJ*)yy->A->data; 2161 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2162 x = (Mat_SeqAIJ*)xx->B->data; 2163 y = (Mat_SeqAIJ*)yy->B->data; 2164 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2165 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2166 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2167 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2168 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2169 } else { 2170 Mat B; 2171 PetscInt *nnz_d,*nnz_o; 2172 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2173 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2174 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2175 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2176 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2177 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2178 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2179 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2180 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2181 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2182 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2183 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2184 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2185 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2186 } 2187 PetscFunctionReturn(0); 2188 } 2189 2190 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatConjugate_MPIAIJ" 2194 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2195 { 2196 #if defined(PETSC_USE_COMPLEX) 2197 PetscErrorCode ierr; 2198 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2199 2200 PetscFunctionBegin; 2201 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2202 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2203 #else 2204 PetscFunctionBegin; 2205 #endif 2206 PetscFunctionReturn(0); 2207 } 2208 2209 #undef __FUNCT__ 2210 #define __FUNCT__ "MatRealPart_MPIAIJ" 2211 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2212 { 2213 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2214 PetscErrorCode ierr; 2215 2216 PetscFunctionBegin; 2217 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2218 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2219 PetscFunctionReturn(0); 2220 } 2221 2222 #undef __FUNCT__ 2223 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2224 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2225 { 2226 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2227 PetscErrorCode ierr; 2228 2229 PetscFunctionBegin; 2230 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2231 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2232 PetscFunctionReturn(0); 2233 } 2234 2235 #if defined(PETSC_HAVE_PBGL) 2236 2237 #include <boost/parallel/mpi/bsp_process_group.hpp> 2238 #include <boost/graph/distributed/ilu_default_graph.hpp> 2239 #include <boost/graph/distributed/ilu_0_block.hpp> 2240 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2241 #include <boost/graph/distributed/petsc/interface.hpp> 2242 #include <boost/multi_array.hpp> 2243 #include <boost/parallel/distributed_property_map->hpp> 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2247 /* 2248 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2249 */ 2250 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2251 { 2252 namespace petsc = boost::distributed::petsc; 2253 2254 namespace graph_dist = boost::graph::distributed; 2255 using boost::graph::distributed::ilu_default::process_group_type; 2256 using boost::graph::ilu_permuted; 2257 2258 PetscBool row_identity, col_identity; 2259 PetscContainer c; 2260 PetscInt m, n, M, N; 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2265 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2266 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2267 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2268 2269 process_group_type pg; 2270 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2271 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2272 lgraph_type& level_graph = *lgraph_p; 2273 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2274 2275 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2276 ilu_permuted(level_graph); 2277 2278 /* put together the new matrix */ 2279 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2280 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2281 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2282 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2283 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2284 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2285 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2286 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2287 2288 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2289 ierr = PetscContainerSetPointer(c, lgraph_p); 2290 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2291 ierr = PetscContainerDestroy(&c); 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2297 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2298 { 2299 PetscFunctionBegin; 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatSolve_MPIAIJ" 2305 /* 2306 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2307 */ 2308 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2309 { 2310 namespace graph_dist = boost::graph::distributed; 2311 2312 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2313 lgraph_type *lgraph_p; 2314 PetscContainer c; 2315 PetscErrorCode ierr; 2316 2317 PetscFunctionBegin; 2318 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2319 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2320 ierr = VecCopy(b, x);CHKERRQ(ierr); 2321 2322 PetscScalar *array_x; 2323 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2324 PetscInt sx; 2325 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2326 2327 PetscScalar *array_b; 2328 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2329 PetscInt sb; 2330 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2331 2332 lgraph_type& level_graph = *lgraph_p; 2333 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2334 2335 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2336 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2337 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2338 2339 typedef boost::iterator_property_map<array_ref_type::iterator, 2340 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2341 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2342 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2343 2344 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2345 PetscFunctionReturn(0); 2346 } 2347 #endif 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2351 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2352 { 2353 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2354 PetscErrorCode ierr; 2355 PetscInt i,*idxb = 0; 2356 PetscScalar *va,*vb; 2357 Vec vtmp; 2358 2359 PetscFunctionBegin; 2360 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2361 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2362 if (idx) { 2363 for (i=0; i<A->rmap->n; i++) { 2364 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2365 } 2366 } 2367 2368 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2369 if (idx) { 2370 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2371 } 2372 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2373 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2374 2375 for (i=0; i<A->rmap->n; i++) { 2376 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2377 va[i] = vb[i]; 2378 if (idx) idx[i] = a->garray[idxb[i]]; 2379 } 2380 } 2381 2382 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2383 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2384 ierr = PetscFree(idxb);CHKERRQ(ierr); 2385 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2391 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2392 { 2393 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2394 PetscErrorCode ierr; 2395 PetscInt i,*idxb = 0; 2396 PetscScalar *va,*vb; 2397 Vec vtmp; 2398 2399 PetscFunctionBegin; 2400 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2401 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2402 if (idx) { 2403 for (i=0; i<A->cmap->n; i++) { 2404 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2405 } 2406 } 2407 2408 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2409 if (idx) { 2410 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2411 } 2412 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2413 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2414 2415 for (i=0; i<A->rmap->n; i++) { 2416 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2417 va[i] = vb[i]; 2418 if (idx) idx[i] = a->garray[idxb[i]]; 2419 } 2420 } 2421 2422 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2423 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2424 ierr = PetscFree(idxb);CHKERRQ(ierr); 2425 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2426 PetscFunctionReturn(0); 2427 } 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2431 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2432 { 2433 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2434 PetscInt n = A->rmap->n; 2435 PetscInt cstart = A->cmap->rstart; 2436 PetscInt *cmap = mat->garray; 2437 PetscInt *diagIdx, *offdiagIdx; 2438 Vec diagV, offdiagV; 2439 PetscScalar *a, *diagA, *offdiagA; 2440 PetscInt r; 2441 PetscErrorCode ierr; 2442 2443 PetscFunctionBegin; 2444 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2445 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2446 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2447 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2448 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2449 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2450 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2451 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2452 for (r = 0; r < n; ++r) { 2453 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2454 a[r] = diagA[r]; 2455 idx[r] = cstart + diagIdx[r]; 2456 } else { 2457 a[r] = offdiagA[r]; 2458 idx[r] = cmap[offdiagIdx[r]]; 2459 } 2460 } 2461 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2462 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2464 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2465 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2466 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2472 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2473 { 2474 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2475 PetscInt n = A->rmap->n; 2476 PetscInt cstart = A->cmap->rstart; 2477 PetscInt *cmap = mat->garray; 2478 PetscInt *diagIdx, *offdiagIdx; 2479 Vec diagV, offdiagV; 2480 PetscScalar *a, *diagA, *offdiagA; 2481 PetscInt r; 2482 PetscErrorCode ierr; 2483 2484 PetscFunctionBegin; 2485 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2486 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2487 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2488 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2489 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2490 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2491 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2492 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2493 for (r = 0; r < n; ++r) { 2494 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2495 a[r] = diagA[r]; 2496 idx[r] = cstart + diagIdx[r]; 2497 } else { 2498 a[r] = offdiagA[r]; 2499 idx[r] = cmap[offdiagIdx[r]]; 2500 } 2501 } 2502 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2503 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2504 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2505 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2506 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2507 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2508 PetscFunctionReturn(0); 2509 } 2510 2511 #undef __FUNCT__ 2512 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2513 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2514 { 2515 PetscErrorCode ierr; 2516 Mat *dummy; 2517 2518 PetscFunctionBegin; 2519 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2520 *newmat = *dummy; 2521 ierr = PetscFree(dummy);CHKERRQ(ierr); 2522 PetscFunctionReturn(0); 2523 } 2524 2525 #undef __FUNCT__ 2526 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2527 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2528 { 2529 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2530 PetscErrorCode ierr; 2531 2532 PetscFunctionBegin; 2533 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2534 PetscFunctionReturn(0); 2535 } 2536 2537 #undef __FUNCT__ 2538 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2539 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2540 { 2541 PetscErrorCode ierr; 2542 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2543 2544 PetscFunctionBegin; 2545 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2546 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2547 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2548 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2549 PetscFunctionReturn(0); 2550 } 2551 2552 /* -------------------------------------------------------------------*/ 2553 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2554 MatGetRow_MPIAIJ, 2555 MatRestoreRow_MPIAIJ, 2556 MatMult_MPIAIJ, 2557 /* 4*/ MatMultAdd_MPIAIJ, 2558 MatMultTranspose_MPIAIJ, 2559 MatMultTransposeAdd_MPIAIJ, 2560 #if defined(PETSC_HAVE_PBGL) 2561 MatSolve_MPIAIJ, 2562 #else 2563 0, 2564 #endif 2565 0, 2566 0, 2567 /*10*/ 0, 2568 0, 2569 0, 2570 MatSOR_MPIAIJ, 2571 MatTranspose_MPIAIJ, 2572 /*15*/ MatGetInfo_MPIAIJ, 2573 MatEqual_MPIAIJ, 2574 MatGetDiagonal_MPIAIJ, 2575 MatDiagonalScale_MPIAIJ, 2576 MatNorm_MPIAIJ, 2577 /*20*/ MatAssemblyBegin_MPIAIJ, 2578 MatAssemblyEnd_MPIAIJ, 2579 MatSetOption_MPIAIJ, 2580 MatZeroEntries_MPIAIJ, 2581 /*24*/ MatZeroRows_MPIAIJ, 2582 0, 2583 #if defined(PETSC_HAVE_PBGL) 2584 0, 2585 #else 2586 0, 2587 #endif 2588 0, 2589 0, 2590 /*29*/ MatSetUp_MPIAIJ, 2591 #if defined(PETSC_HAVE_PBGL) 2592 0, 2593 #else 2594 0, 2595 #endif 2596 0, 2597 0, 2598 0, 2599 /*34*/ MatDuplicate_MPIAIJ, 2600 0, 2601 0, 2602 0, 2603 0, 2604 /*39*/ MatAXPY_MPIAIJ, 2605 MatGetSubMatrices_MPIAIJ, 2606 MatIncreaseOverlap_MPIAIJ, 2607 MatGetValues_MPIAIJ, 2608 MatCopy_MPIAIJ, 2609 /*44*/ MatGetRowMax_MPIAIJ, 2610 MatScale_MPIAIJ, 2611 0, 2612 MatDiagonalSet_MPIAIJ, 2613 MatZeroRowsColumns_MPIAIJ, 2614 /*49*/ MatSetRandom_MPIAIJ, 2615 0, 2616 0, 2617 0, 2618 0, 2619 /*54*/ MatFDColoringCreate_MPIXAIJ, 2620 0, 2621 MatSetUnfactored_MPIAIJ, 2622 MatPermute_MPIAIJ, 2623 0, 2624 /*59*/ MatGetSubMatrix_MPIAIJ, 2625 MatDestroy_MPIAIJ, 2626 MatView_MPIAIJ, 2627 0, 2628 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2629 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2630 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2635 MatGetRowMinAbs_MPIAIJ, 2636 0, 2637 MatSetColoring_MPIAIJ, 2638 0, 2639 MatSetValuesAdifor_MPIAIJ, 2640 /*75*/ MatFDColoringApply_AIJ, 2641 0, 2642 0, 2643 0, 2644 MatFindZeroDiagonals_MPIAIJ, 2645 /*80*/ 0, 2646 0, 2647 0, 2648 /*83*/ MatLoad_MPIAIJ, 2649 0, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2655 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2656 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2657 MatPtAP_MPIAIJ_MPIAIJ, 2658 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2659 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*99*/ 0, 2665 0, 2666 0, 2667 MatConjugate_MPIAIJ, 2668 0, 2669 /*104*/MatSetValuesRow_MPIAIJ, 2670 MatRealPart_MPIAIJ, 2671 MatImaginaryPart_MPIAIJ, 2672 0, 2673 0, 2674 /*109*/0, 2675 0, 2676 MatGetRowMin_MPIAIJ, 2677 0, 2678 0, 2679 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*119*/0, 2685 0, 2686 0, 2687 0, 2688 MatGetMultiProcBlock_MPIAIJ, 2689 /*124*/MatFindNonzeroRows_MPIAIJ, 2690 MatGetColumnNorms_MPIAIJ, 2691 MatInvertBlockDiagonal_MPIAIJ, 2692 0, 2693 MatGetSubMatricesParallel_MPIAIJ, 2694 /*129*/0, 2695 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2696 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2697 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2698 0, 2699 /*134*/0, 2700 0, 2701 0, 2702 0, 2703 0, 2704 /*139*/0, 2705 0, 2706 0, 2707 MatFDColoringSetUp_MPIXAIJ, 2708 0, 2709 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2710 }; 2711 2712 /* ----------------------------------------------------------------------------------------*/ 2713 2714 #undef __FUNCT__ 2715 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2716 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2717 { 2718 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2719 PetscErrorCode ierr; 2720 2721 PetscFunctionBegin; 2722 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2723 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2724 PetscFunctionReturn(0); 2725 } 2726 2727 #undef __FUNCT__ 2728 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2729 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2730 { 2731 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2732 PetscErrorCode ierr; 2733 2734 PetscFunctionBegin; 2735 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2736 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2737 PetscFunctionReturn(0); 2738 } 2739 2740 #undef __FUNCT__ 2741 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2742 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2743 { 2744 Mat_MPIAIJ *b; 2745 PetscErrorCode ierr; 2746 2747 PetscFunctionBegin; 2748 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2749 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2750 b = (Mat_MPIAIJ*)B->data; 2751 2752 if (!B->preallocated) { 2753 /* Explicitly create 2 MATSEQAIJ matrices. */ 2754 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2755 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2756 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2757 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2758 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2759 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2760 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2761 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2762 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2763 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2764 } 2765 2766 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2767 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2768 B->preallocated = PETSC_TRUE; 2769 PetscFunctionReturn(0); 2770 } 2771 2772 #undef __FUNCT__ 2773 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2774 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2775 { 2776 Mat mat; 2777 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2778 PetscErrorCode ierr; 2779 2780 PetscFunctionBegin; 2781 *newmat = 0; 2782 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2783 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2784 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2785 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2786 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2787 a = (Mat_MPIAIJ*)mat->data; 2788 2789 mat->factortype = matin->factortype; 2790 mat->assembled = PETSC_TRUE; 2791 mat->insertmode = NOT_SET_VALUES; 2792 mat->preallocated = PETSC_TRUE; 2793 2794 a->size = oldmat->size; 2795 a->rank = oldmat->rank; 2796 a->donotstash = oldmat->donotstash; 2797 a->roworiented = oldmat->roworiented; 2798 a->rowindices = 0; 2799 a->rowvalues = 0; 2800 a->getrowactive = PETSC_FALSE; 2801 2802 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2803 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2804 2805 if (oldmat->colmap) { 2806 #if defined(PETSC_USE_CTABLE) 2807 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2808 #else 2809 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2810 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2811 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2812 #endif 2813 } else a->colmap = 0; 2814 if (oldmat->garray) { 2815 PetscInt len; 2816 len = oldmat->B->cmap->n; 2817 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2818 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2819 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2820 } else a->garray = 0; 2821 2822 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2823 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2824 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2825 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2826 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2827 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2828 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2829 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2830 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2831 *newmat = mat; 2832 PetscFunctionReturn(0); 2833 } 2834 2835 2836 2837 #undef __FUNCT__ 2838 #define __FUNCT__ "MatLoad_MPIAIJ" 2839 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2840 { 2841 PetscScalar *vals,*svals; 2842 MPI_Comm comm; 2843 PetscErrorCode ierr; 2844 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2845 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2846 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2847 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2848 PetscInt cend,cstart,n,*rowners,sizesset=1; 2849 int fd; 2850 PetscInt bs = newMat->rmap->bs; 2851 2852 PetscFunctionBegin; 2853 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2854 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2855 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2856 if (!rank) { 2857 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2858 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2859 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2860 } 2861 2862 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 2863 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2864 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2865 if (bs < 0) bs = 1; 2866 2867 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2868 2869 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2870 M = header[1]; N = header[2]; 2871 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2872 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2873 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2874 2875 /* If global sizes are set, check if they are consistent with that given in the file */ 2876 if (sizesset) { 2877 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2878 } 2879 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2880 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2881 2882 /* determine ownership of all (block) rows */ 2883 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2884 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2885 else m = newMat->rmap->n; /* Set by user */ 2886 2887 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2888 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2889 2890 /* First process needs enough room for process with most rows */ 2891 if (!rank) { 2892 mmax = rowners[1]; 2893 for (i=2; i<=size; i++) { 2894 mmax = PetscMax(mmax, rowners[i]); 2895 } 2896 } else mmax = -1; /* unused, but compilers complain */ 2897 2898 rowners[0] = 0; 2899 for (i=2; i<=size; i++) { 2900 rowners[i] += rowners[i-1]; 2901 } 2902 rstart = rowners[rank]; 2903 rend = rowners[rank+1]; 2904 2905 /* distribute row lengths to all processors */ 2906 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2907 if (!rank) { 2908 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2909 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2910 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2911 for (j=0; j<m; j++) { 2912 procsnz[0] += ourlens[j]; 2913 } 2914 for (i=1; i<size; i++) { 2915 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2916 /* calculate the number of nonzeros on each processor */ 2917 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2918 procsnz[i] += rowlengths[j]; 2919 } 2920 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2921 } 2922 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2923 } else { 2924 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2925 } 2926 2927 if (!rank) { 2928 /* determine max buffer needed and allocate it */ 2929 maxnz = 0; 2930 for (i=0; i<size; i++) { 2931 maxnz = PetscMax(maxnz,procsnz[i]); 2932 } 2933 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2934 2935 /* read in my part of the matrix column indices */ 2936 nz = procsnz[0]; 2937 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2938 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2939 2940 /* read in every one elses and ship off */ 2941 for (i=1; i<size; i++) { 2942 nz = procsnz[i]; 2943 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2944 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2945 } 2946 ierr = PetscFree(cols);CHKERRQ(ierr); 2947 } else { 2948 /* determine buffer space needed for message */ 2949 nz = 0; 2950 for (i=0; i<m; i++) { 2951 nz += ourlens[i]; 2952 } 2953 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2954 2955 /* receive message of column indices*/ 2956 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2957 } 2958 2959 /* determine column ownership if matrix is not square */ 2960 if (N != M) { 2961 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2962 else n = newMat->cmap->n; 2963 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2964 cstart = cend - n; 2965 } else { 2966 cstart = rstart; 2967 cend = rend; 2968 n = cend - cstart; 2969 } 2970 2971 /* loop over local rows, determining number of off diagonal entries */ 2972 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2973 jj = 0; 2974 for (i=0; i<m; i++) { 2975 for (j=0; j<ourlens[i]; j++) { 2976 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2977 jj++; 2978 } 2979 } 2980 2981 for (i=0; i<m; i++) { 2982 ourlens[i] -= offlens[i]; 2983 } 2984 if (!sizesset) { 2985 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2986 } 2987 2988 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2989 2990 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2991 2992 for (i=0; i<m; i++) { 2993 ourlens[i] += offlens[i]; 2994 } 2995 2996 if (!rank) { 2997 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2998 2999 /* read in my part of the matrix numerical values */ 3000 nz = procsnz[0]; 3001 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3002 3003 /* insert into matrix */ 3004 jj = rstart; 3005 smycols = mycols; 3006 svals = vals; 3007 for (i=0; i<m; i++) { 3008 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3009 smycols += ourlens[i]; 3010 svals += ourlens[i]; 3011 jj++; 3012 } 3013 3014 /* read in other processors and ship out */ 3015 for (i=1; i<size; i++) { 3016 nz = procsnz[i]; 3017 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3018 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3019 } 3020 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3021 } else { 3022 /* receive numeric values */ 3023 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3024 3025 /* receive message of values*/ 3026 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3027 3028 /* insert into matrix */ 3029 jj = rstart; 3030 smycols = mycols; 3031 svals = vals; 3032 for (i=0; i<m; i++) { 3033 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3034 smycols += ourlens[i]; 3035 svals += ourlens[i]; 3036 jj++; 3037 } 3038 } 3039 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3040 ierr = PetscFree(vals);CHKERRQ(ierr); 3041 ierr = PetscFree(mycols);CHKERRQ(ierr); 3042 ierr = PetscFree(rowners);CHKERRQ(ierr); 3043 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3044 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3045 PetscFunctionReturn(0); 3046 } 3047 3048 #undef __FUNCT__ 3049 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3050 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3051 { 3052 PetscErrorCode ierr; 3053 IS iscol_local; 3054 PetscInt csize; 3055 3056 PetscFunctionBegin; 3057 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3058 if (call == MAT_REUSE_MATRIX) { 3059 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3060 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3061 } else { 3062 PetscInt cbs; 3063 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3064 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3065 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3066 } 3067 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3068 if (call == MAT_INITIAL_MATRIX) { 3069 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3070 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3071 } 3072 PetscFunctionReturn(0); 3073 } 3074 3075 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3076 #undef __FUNCT__ 3077 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3078 /* 3079 Not great since it makes two copies of the submatrix, first an SeqAIJ 3080 in local and then by concatenating the local matrices the end result. 3081 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3082 3083 Note: This requires a sequential iscol with all indices. 3084 */ 3085 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3086 { 3087 PetscErrorCode ierr; 3088 PetscMPIInt rank,size; 3089 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3090 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3091 PetscBool allcolumns, colflag; 3092 Mat M,Mreuse; 3093 MatScalar *vwork,*aa; 3094 MPI_Comm comm; 3095 Mat_SeqAIJ *aij; 3096 3097 PetscFunctionBegin; 3098 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3099 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3101 3102 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3103 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3104 if (colflag && ncol == mat->cmap->N) { 3105 allcolumns = PETSC_TRUE; 3106 } else { 3107 allcolumns = PETSC_FALSE; 3108 } 3109 if (call == MAT_REUSE_MATRIX) { 3110 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3111 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3112 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3113 } else { 3114 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3115 } 3116 3117 /* 3118 m - number of local rows 3119 n - number of columns (same on all processors) 3120 rstart - first row in new global matrix generated 3121 */ 3122 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3123 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3124 if (call == MAT_INITIAL_MATRIX) { 3125 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3126 ii = aij->i; 3127 jj = aij->j; 3128 3129 /* 3130 Determine the number of non-zeros in the diagonal and off-diagonal 3131 portions of the matrix in order to do correct preallocation 3132 */ 3133 3134 /* first get start and end of "diagonal" columns */ 3135 if (csize == PETSC_DECIDE) { 3136 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3137 if (mglobal == n) { /* square matrix */ 3138 nlocal = m; 3139 } else { 3140 nlocal = n/size + ((n % size) > rank); 3141 } 3142 } else { 3143 nlocal = csize; 3144 } 3145 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3146 rstart = rend - nlocal; 3147 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3148 3149 /* next, compute all the lengths */ 3150 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3151 olens = dlens + m; 3152 for (i=0; i<m; i++) { 3153 jend = ii[i+1] - ii[i]; 3154 olen = 0; 3155 dlen = 0; 3156 for (j=0; j<jend; j++) { 3157 if (*jj < rstart || *jj >= rend) olen++; 3158 else dlen++; 3159 jj++; 3160 } 3161 olens[i] = olen; 3162 dlens[i] = dlen; 3163 } 3164 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3165 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3166 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3167 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3168 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3169 ierr = PetscFree(dlens);CHKERRQ(ierr); 3170 } else { 3171 PetscInt ml,nl; 3172 3173 M = *newmat; 3174 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3175 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3176 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3177 /* 3178 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3179 rather than the slower MatSetValues(). 3180 */ 3181 M->was_assembled = PETSC_TRUE; 3182 M->assembled = PETSC_FALSE; 3183 } 3184 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3185 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3186 ii = aij->i; 3187 jj = aij->j; 3188 aa = aij->a; 3189 for (i=0; i<m; i++) { 3190 row = rstart + i; 3191 nz = ii[i+1] - ii[i]; 3192 cwork = jj; jj += nz; 3193 vwork = aa; aa += nz; 3194 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3195 } 3196 3197 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3198 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3199 *newmat = M; 3200 3201 /* save submatrix used in processor for next request */ 3202 if (call == MAT_INITIAL_MATRIX) { 3203 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3204 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3205 } 3206 PetscFunctionReturn(0); 3207 } 3208 3209 #undef __FUNCT__ 3210 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3211 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3212 { 3213 PetscInt m,cstart, cend,j,nnz,i,d; 3214 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3215 const PetscInt *JJ; 3216 PetscScalar *values; 3217 PetscErrorCode ierr; 3218 3219 PetscFunctionBegin; 3220 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3221 3222 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3223 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3224 m = B->rmap->n; 3225 cstart = B->cmap->rstart; 3226 cend = B->cmap->rend; 3227 rstart = B->rmap->rstart; 3228 3229 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3230 3231 #if defined(PETSC_USE_DEBUGGING) 3232 for (i=0; i<m; i++) { 3233 nnz = Ii[i+1]- Ii[i]; 3234 JJ = J + Ii[i]; 3235 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3236 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3237 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3238 } 3239 #endif 3240 3241 for (i=0; i<m; i++) { 3242 nnz = Ii[i+1]- Ii[i]; 3243 JJ = J + Ii[i]; 3244 nnz_max = PetscMax(nnz_max,nnz); 3245 d = 0; 3246 for (j=0; j<nnz; j++) { 3247 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3248 } 3249 d_nnz[i] = d; 3250 o_nnz[i] = nnz - d; 3251 } 3252 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3253 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3254 3255 if (v) values = (PetscScalar*)v; 3256 else { 3257 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3258 } 3259 3260 for (i=0; i<m; i++) { 3261 ii = i + rstart; 3262 nnz = Ii[i+1]- Ii[i]; 3263 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3264 } 3265 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3266 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3267 3268 if (!v) { 3269 ierr = PetscFree(values);CHKERRQ(ierr); 3270 } 3271 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3272 PetscFunctionReturn(0); 3273 } 3274 3275 #undef __FUNCT__ 3276 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3277 /*@ 3278 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3279 (the default parallel PETSc format). 3280 3281 Collective on MPI_Comm 3282 3283 Input Parameters: 3284 + B - the matrix 3285 . i - the indices into j for the start of each local row (starts with zero) 3286 . j - the column indices for each local row (starts with zero) 3287 - v - optional values in the matrix 3288 3289 Level: developer 3290 3291 Notes: 3292 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3293 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3294 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3295 3296 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3297 3298 The format which is used for the sparse matrix input, is equivalent to a 3299 row-major ordering.. i.e for the following matrix, the input data expected is 3300 as shown: 3301 3302 1 0 0 3303 2 0 3 P0 3304 ------- 3305 4 5 6 P1 3306 3307 Process0 [P0]: rows_owned=[0,1] 3308 i = {0,1,3} [size = nrow+1 = 2+1] 3309 j = {0,0,2} [size = nz = 6] 3310 v = {1,2,3} [size = nz = 6] 3311 3312 Process1 [P1]: rows_owned=[2] 3313 i = {0,3} [size = nrow+1 = 1+1] 3314 j = {0,1,2} [size = nz = 6] 3315 v = {4,5,6} [size = nz = 6] 3316 3317 .keywords: matrix, aij, compressed row, sparse, parallel 3318 3319 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3320 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3321 @*/ 3322 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3323 { 3324 PetscErrorCode ierr; 3325 3326 PetscFunctionBegin; 3327 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3328 PetscFunctionReturn(0); 3329 } 3330 3331 #undef __FUNCT__ 3332 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3333 /*@C 3334 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3335 (the default parallel PETSc format). For good matrix assembly performance 3336 the user should preallocate the matrix storage by setting the parameters 3337 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3338 performance can be increased by more than a factor of 50. 3339 3340 Collective on MPI_Comm 3341 3342 Input Parameters: 3343 + B - the matrix 3344 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3345 (same value is used for all local rows) 3346 . d_nnz - array containing the number of nonzeros in the various rows of the 3347 DIAGONAL portion of the local submatrix (possibly different for each row) 3348 or NULL, if d_nz is used to specify the nonzero structure. 3349 The size of this array is equal to the number of local rows, i.e 'm'. 3350 For matrices that will be factored, you must leave room for (and set) 3351 the diagonal entry even if it is zero. 3352 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3353 submatrix (same value is used for all local rows). 3354 - o_nnz - array containing the number of nonzeros in the various rows of the 3355 OFF-DIAGONAL portion of the local submatrix (possibly different for 3356 each row) or NULL, if o_nz is used to specify the nonzero 3357 structure. The size of this array is equal to the number 3358 of local rows, i.e 'm'. 3359 3360 If the *_nnz parameter is given then the *_nz parameter is ignored 3361 3362 The AIJ format (also called the Yale sparse matrix format or 3363 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3364 storage. The stored row and column indices begin with zero. 3365 See Users-Manual: ch_mat for details. 3366 3367 The parallel matrix is partitioned such that the first m0 rows belong to 3368 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3369 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3370 3371 The DIAGONAL portion of the local submatrix of a processor can be defined 3372 as the submatrix which is obtained by extraction the part corresponding to 3373 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3374 first row that belongs to the processor, r2 is the last row belonging to 3375 the this processor, and c1-c2 is range of indices of the local part of a 3376 vector suitable for applying the matrix to. This is an mxn matrix. In the 3377 common case of a square matrix, the row and column ranges are the same and 3378 the DIAGONAL part is also square. The remaining portion of the local 3379 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3380 3381 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3382 3383 You can call MatGetInfo() to get information on how effective the preallocation was; 3384 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3385 You can also run with the option -info and look for messages with the string 3386 malloc in them to see if additional memory allocation was needed. 3387 3388 Example usage: 3389 3390 Consider the following 8x8 matrix with 34 non-zero values, that is 3391 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3392 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3393 as follows: 3394 3395 .vb 3396 1 2 0 | 0 3 0 | 0 4 3397 Proc0 0 5 6 | 7 0 0 | 8 0 3398 9 0 10 | 11 0 0 | 12 0 3399 ------------------------------------- 3400 13 0 14 | 15 16 17 | 0 0 3401 Proc1 0 18 0 | 19 20 21 | 0 0 3402 0 0 0 | 22 23 0 | 24 0 3403 ------------------------------------- 3404 Proc2 25 26 27 | 0 0 28 | 29 0 3405 30 0 0 | 31 32 33 | 0 34 3406 .ve 3407 3408 This can be represented as a collection of submatrices as: 3409 3410 .vb 3411 A B C 3412 D E F 3413 G H I 3414 .ve 3415 3416 Where the submatrices A,B,C are owned by proc0, D,E,F are 3417 owned by proc1, G,H,I are owned by proc2. 3418 3419 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3420 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3421 The 'M','N' parameters are 8,8, and have the same values on all procs. 3422 3423 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3424 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3425 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3426 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3427 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3428 matrix, ans [DF] as another SeqAIJ matrix. 3429 3430 When d_nz, o_nz parameters are specified, d_nz storage elements are 3431 allocated for every row of the local diagonal submatrix, and o_nz 3432 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3433 One way to choose d_nz and o_nz is to use the max nonzerors per local 3434 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3435 In this case, the values of d_nz,o_nz are: 3436 .vb 3437 proc0 : dnz = 2, o_nz = 2 3438 proc1 : dnz = 3, o_nz = 2 3439 proc2 : dnz = 1, o_nz = 4 3440 .ve 3441 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3442 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3443 for proc3. i.e we are using 12+15+10=37 storage locations to store 3444 34 values. 3445 3446 When d_nnz, o_nnz parameters are specified, the storage is specified 3447 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3448 In the above case the values for d_nnz,o_nnz are: 3449 .vb 3450 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3451 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3452 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3453 .ve 3454 Here the space allocated is sum of all the above values i.e 34, and 3455 hence pre-allocation is perfect. 3456 3457 Level: intermediate 3458 3459 .keywords: matrix, aij, compressed row, sparse, parallel 3460 3461 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3462 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3463 @*/ 3464 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3465 { 3466 PetscErrorCode ierr; 3467 3468 PetscFunctionBegin; 3469 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3470 PetscValidType(B,1); 3471 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3472 PetscFunctionReturn(0); 3473 } 3474 3475 #undef __FUNCT__ 3476 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3477 /*@ 3478 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3479 CSR format the local rows. 3480 3481 Collective on MPI_Comm 3482 3483 Input Parameters: 3484 + comm - MPI communicator 3485 . m - number of local rows (Cannot be PETSC_DECIDE) 3486 . n - This value should be the same as the local size used in creating the 3487 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3488 calculated if N is given) For square matrices n is almost always m. 3489 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3490 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3491 . i - row indices 3492 . j - column indices 3493 - a - matrix values 3494 3495 Output Parameter: 3496 . mat - the matrix 3497 3498 Level: intermediate 3499 3500 Notes: 3501 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3502 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3503 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3504 3505 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3506 3507 The format which is used for the sparse matrix input, is equivalent to a 3508 row-major ordering.. i.e for the following matrix, the input data expected is 3509 as shown: 3510 3511 1 0 0 3512 2 0 3 P0 3513 ------- 3514 4 5 6 P1 3515 3516 Process0 [P0]: rows_owned=[0,1] 3517 i = {0,1,3} [size = nrow+1 = 2+1] 3518 j = {0,0,2} [size = nz = 6] 3519 v = {1,2,3} [size = nz = 6] 3520 3521 Process1 [P1]: rows_owned=[2] 3522 i = {0,3} [size = nrow+1 = 1+1] 3523 j = {0,1,2} [size = nz = 6] 3524 v = {4,5,6} [size = nz = 6] 3525 3526 .keywords: matrix, aij, compressed row, sparse, parallel 3527 3528 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3529 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3530 @*/ 3531 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3532 { 3533 PetscErrorCode ierr; 3534 3535 PetscFunctionBegin; 3536 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3537 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3538 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3539 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3540 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3541 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3542 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3543 PetscFunctionReturn(0); 3544 } 3545 3546 #undef __FUNCT__ 3547 #define __FUNCT__ "MatCreateAIJ" 3548 /*@C 3549 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3550 (the default parallel PETSc format). For good matrix assembly performance 3551 the user should preallocate the matrix storage by setting the parameters 3552 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3553 performance can be increased by more than a factor of 50. 3554 3555 Collective on MPI_Comm 3556 3557 Input Parameters: 3558 + comm - MPI communicator 3559 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3560 This value should be the same as the local size used in creating the 3561 y vector for the matrix-vector product y = Ax. 3562 . n - This value should be the same as the local size used in creating the 3563 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3564 calculated if N is given) For square matrices n is almost always m. 3565 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3566 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3567 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3568 (same value is used for all local rows) 3569 . d_nnz - array containing the number of nonzeros in the various rows of the 3570 DIAGONAL portion of the local submatrix (possibly different for each row) 3571 or NULL, if d_nz is used to specify the nonzero structure. 3572 The size of this array is equal to the number of local rows, i.e 'm'. 3573 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3574 submatrix (same value is used for all local rows). 3575 - o_nnz - array containing the number of nonzeros in the various rows of the 3576 OFF-DIAGONAL portion of the local submatrix (possibly different for 3577 each row) or NULL, if o_nz is used to specify the nonzero 3578 structure. The size of this array is equal to the number 3579 of local rows, i.e 'm'. 3580 3581 Output Parameter: 3582 . A - the matrix 3583 3584 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3585 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3586 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3587 3588 Notes: 3589 If the *_nnz parameter is given then the *_nz parameter is ignored 3590 3591 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3592 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3593 storage requirements for this matrix. 3594 3595 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3596 processor than it must be used on all processors that share the object for 3597 that argument. 3598 3599 The user MUST specify either the local or global matrix dimensions 3600 (possibly both). 3601 3602 The parallel matrix is partitioned across processors such that the 3603 first m0 rows belong to process 0, the next m1 rows belong to 3604 process 1, the next m2 rows belong to process 2 etc.. where 3605 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3606 values corresponding to [m x N] submatrix. 3607 3608 The columns are logically partitioned with the n0 columns belonging 3609 to 0th partition, the next n1 columns belonging to the next 3610 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3611 3612 The DIAGONAL portion of the local submatrix on any given processor 3613 is the submatrix corresponding to the rows and columns m,n 3614 corresponding to the given processor. i.e diagonal matrix on 3615 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3616 etc. The remaining portion of the local submatrix [m x (N-n)] 3617 constitute the OFF-DIAGONAL portion. The example below better 3618 illustrates this concept. 3619 3620 For a square global matrix we define each processor's diagonal portion 3621 to be its local rows and the corresponding columns (a square submatrix); 3622 each processor's off-diagonal portion encompasses the remainder of the 3623 local matrix (a rectangular submatrix). 3624 3625 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3626 3627 When calling this routine with a single process communicator, a matrix of 3628 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3629 type of communicator, use the construction mechanism: 3630 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3631 3632 By default, this format uses inodes (identical nodes) when possible. 3633 We search for consecutive rows with the same nonzero structure, thereby 3634 reusing matrix information to achieve increased efficiency. 3635 3636 Options Database Keys: 3637 + -mat_no_inode - Do not use inodes 3638 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3639 - -mat_aij_oneindex - Internally use indexing starting at 1 3640 rather than 0. Note that when calling MatSetValues(), 3641 the user still MUST index entries starting at 0! 3642 3643 3644 Example usage: 3645 3646 Consider the following 8x8 matrix with 34 non-zero values, that is 3647 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3648 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3649 as follows: 3650 3651 .vb 3652 1 2 0 | 0 3 0 | 0 4 3653 Proc0 0 5 6 | 7 0 0 | 8 0 3654 9 0 10 | 11 0 0 | 12 0 3655 ------------------------------------- 3656 13 0 14 | 15 16 17 | 0 0 3657 Proc1 0 18 0 | 19 20 21 | 0 0 3658 0 0 0 | 22 23 0 | 24 0 3659 ------------------------------------- 3660 Proc2 25 26 27 | 0 0 28 | 29 0 3661 30 0 0 | 31 32 33 | 0 34 3662 .ve 3663 3664 This can be represented as a collection of submatrices as: 3665 3666 .vb 3667 A B C 3668 D E F 3669 G H I 3670 .ve 3671 3672 Where the submatrices A,B,C are owned by proc0, D,E,F are 3673 owned by proc1, G,H,I are owned by proc2. 3674 3675 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3676 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3677 The 'M','N' parameters are 8,8, and have the same values on all procs. 3678 3679 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3680 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3681 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3682 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3683 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3684 matrix, ans [DF] as another SeqAIJ matrix. 3685 3686 When d_nz, o_nz parameters are specified, d_nz storage elements are 3687 allocated for every row of the local diagonal submatrix, and o_nz 3688 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3689 One way to choose d_nz and o_nz is to use the max nonzerors per local 3690 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3691 In this case, the values of d_nz,o_nz are: 3692 .vb 3693 proc0 : dnz = 2, o_nz = 2 3694 proc1 : dnz = 3, o_nz = 2 3695 proc2 : dnz = 1, o_nz = 4 3696 .ve 3697 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3698 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3699 for proc3. i.e we are using 12+15+10=37 storage locations to store 3700 34 values. 3701 3702 When d_nnz, o_nnz parameters are specified, the storage is specified 3703 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3704 In the above case the values for d_nnz,o_nnz are: 3705 .vb 3706 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3707 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3708 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3709 .ve 3710 Here the space allocated is sum of all the above values i.e 34, and 3711 hence pre-allocation is perfect. 3712 3713 Level: intermediate 3714 3715 .keywords: matrix, aij, compressed row, sparse, parallel 3716 3717 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3718 MPIAIJ, MatCreateMPIAIJWithArrays() 3719 @*/ 3720 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3721 { 3722 PetscErrorCode ierr; 3723 PetscMPIInt size; 3724 3725 PetscFunctionBegin; 3726 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3727 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3728 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3729 if (size > 1) { 3730 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3731 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3732 } else { 3733 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3734 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3735 } 3736 PetscFunctionReturn(0); 3737 } 3738 3739 #undef __FUNCT__ 3740 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3741 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3742 { 3743 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3744 3745 PetscFunctionBegin; 3746 if (Ad) *Ad = a->A; 3747 if (Ao) *Ao = a->B; 3748 if (colmap) *colmap = a->garray; 3749 PetscFunctionReturn(0); 3750 } 3751 3752 #undef __FUNCT__ 3753 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3754 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3755 { 3756 PetscErrorCode ierr; 3757 PetscInt i; 3758 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3759 3760 PetscFunctionBegin; 3761 if (coloring->ctype == IS_COLORING_GLOBAL) { 3762 ISColoringValue *allcolors,*colors; 3763 ISColoring ocoloring; 3764 3765 /* set coloring for diagonal portion */ 3766 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3767 3768 /* set coloring for off-diagonal portion */ 3769 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3770 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3771 for (i=0; i<a->B->cmap->n; i++) { 3772 colors[i] = allcolors[a->garray[i]]; 3773 } 3774 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3775 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3776 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3777 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3778 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3779 ISColoringValue *colors; 3780 PetscInt *larray; 3781 ISColoring ocoloring; 3782 3783 /* set coloring for diagonal portion */ 3784 ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr); 3785 for (i=0; i<a->A->cmap->n; i++) { 3786 larray[i] = i + A->cmap->rstart; 3787 } 3788 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3789 ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr); 3790 for (i=0; i<a->A->cmap->n; i++) { 3791 colors[i] = coloring->colors[larray[i]]; 3792 } 3793 ierr = PetscFree(larray);CHKERRQ(ierr); 3794 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3795 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3796 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3797 3798 /* set coloring for off-diagonal portion */ 3799 ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr); 3800 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3801 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3802 for (i=0; i<a->B->cmap->n; i++) { 3803 colors[i] = coloring->colors[larray[i]]; 3804 } 3805 ierr = PetscFree(larray);CHKERRQ(ierr); 3806 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3807 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3808 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3809 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3810 PetscFunctionReturn(0); 3811 } 3812 3813 #undef __FUNCT__ 3814 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3815 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3816 { 3817 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3818 PetscErrorCode ierr; 3819 3820 PetscFunctionBegin; 3821 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3822 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3823 PetscFunctionReturn(0); 3824 } 3825 3826 #undef __FUNCT__ 3827 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3828 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3829 { 3830 PetscErrorCode ierr; 3831 PetscInt m,N,i,rstart,nnz,Ii; 3832 PetscInt *indx; 3833 PetscScalar *values; 3834 3835 PetscFunctionBegin; 3836 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3837 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3838 PetscInt *dnz,*onz,sum,bs,cbs; 3839 3840 if (n == PETSC_DECIDE) { 3841 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3842 } 3843 /* Check sum(n) = N */ 3844 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3845 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3846 3847 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3848 rstart -= m; 3849 3850 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3851 for (i=0; i<m; i++) { 3852 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3853 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3854 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3855 } 3856 3857 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3858 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3859 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3860 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3861 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3862 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3863 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3864 } 3865 3866 /* numeric phase */ 3867 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3868 for (i=0; i<m; i++) { 3869 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3870 Ii = i + rstart; 3871 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3872 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3873 } 3874 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3875 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3876 PetscFunctionReturn(0); 3877 } 3878 3879 #undef __FUNCT__ 3880 #define __FUNCT__ "MatFileSplit" 3881 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3882 { 3883 PetscErrorCode ierr; 3884 PetscMPIInt rank; 3885 PetscInt m,N,i,rstart,nnz; 3886 size_t len; 3887 const PetscInt *indx; 3888 PetscViewer out; 3889 char *name; 3890 Mat B; 3891 const PetscScalar *values; 3892 3893 PetscFunctionBegin; 3894 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3895 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3896 /* Should this be the type of the diagonal block of A? */ 3897 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3898 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3899 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3900 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3901 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3902 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3903 for (i=0; i<m; i++) { 3904 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3905 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3906 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3907 } 3908 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3909 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3910 3911 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3912 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3913 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3914 sprintf(name,"%s.%d",outfile,rank); 3915 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3916 ierr = PetscFree(name);CHKERRQ(ierr); 3917 ierr = MatView(B,out);CHKERRQ(ierr); 3918 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3919 ierr = MatDestroy(&B);CHKERRQ(ierr); 3920 PetscFunctionReturn(0); 3921 } 3922 3923 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3924 #undef __FUNCT__ 3925 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3926 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3927 { 3928 PetscErrorCode ierr; 3929 Mat_Merge_SeqsToMPI *merge; 3930 PetscContainer container; 3931 3932 PetscFunctionBegin; 3933 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3934 if (container) { 3935 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3936 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3937 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3938 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3939 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3940 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3941 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3942 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3943 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3944 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3945 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3946 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3947 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3948 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3949 ierr = PetscFree(merge);CHKERRQ(ierr); 3950 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3951 } 3952 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3953 PetscFunctionReturn(0); 3954 } 3955 3956 #include <../src/mat/utils/freespace.h> 3957 #include <petscbt.h> 3958 3959 #undef __FUNCT__ 3960 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3961 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3962 { 3963 PetscErrorCode ierr; 3964 MPI_Comm comm; 3965 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3966 PetscMPIInt size,rank,taga,*len_s; 3967 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3968 PetscInt proc,m; 3969 PetscInt **buf_ri,**buf_rj; 3970 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3971 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3972 MPI_Request *s_waits,*r_waits; 3973 MPI_Status *status; 3974 MatScalar *aa=a->a; 3975 MatScalar **abuf_r,*ba_i; 3976 Mat_Merge_SeqsToMPI *merge; 3977 PetscContainer container; 3978 3979 PetscFunctionBegin; 3980 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3981 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3982 3983 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3984 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3985 3986 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3987 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3988 3989 bi = merge->bi; 3990 bj = merge->bj; 3991 buf_ri = merge->buf_ri; 3992 buf_rj = merge->buf_rj; 3993 3994 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3995 owners = merge->rowmap->range; 3996 len_s = merge->len_s; 3997 3998 /* send and recv matrix values */ 3999 /*-----------------------------*/ 4000 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4001 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4002 4003 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4004 for (proc=0,k=0; proc<size; proc++) { 4005 if (!len_s[proc]) continue; 4006 i = owners[proc]; 4007 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4008 k++; 4009 } 4010 4011 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4012 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4013 ierr = PetscFree(status);CHKERRQ(ierr); 4014 4015 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4016 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4017 4018 /* insert mat values of mpimat */ 4019 /*----------------------------*/ 4020 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4021 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4022 4023 for (k=0; k<merge->nrecv; k++) { 4024 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4025 nrows = *(buf_ri_k[k]); 4026 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4027 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4028 } 4029 4030 /* set values of ba */ 4031 m = merge->rowmap->n; 4032 for (i=0; i<m; i++) { 4033 arow = owners[rank] + i; 4034 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4035 bnzi = bi[i+1] - bi[i]; 4036 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4037 4038 /* add local non-zero vals of this proc's seqmat into ba */ 4039 anzi = ai[arow+1] - ai[arow]; 4040 aj = a->j + ai[arow]; 4041 aa = a->a + ai[arow]; 4042 nextaj = 0; 4043 for (j=0; nextaj<anzi; j++) { 4044 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4045 ba_i[j] += aa[nextaj++]; 4046 } 4047 } 4048 4049 /* add received vals into ba */ 4050 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4051 /* i-th row */ 4052 if (i == *nextrow[k]) { 4053 anzi = *(nextai[k]+1) - *nextai[k]; 4054 aj = buf_rj[k] + *(nextai[k]); 4055 aa = abuf_r[k] + *(nextai[k]); 4056 nextaj = 0; 4057 for (j=0; nextaj<anzi; j++) { 4058 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4059 ba_i[j] += aa[nextaj++]; 4060 } 4061 } 4062 nextrow[k]++; nextai[k]++; 4063 } 4064 } 4065 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4066 } 4067 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4068 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4069 4070 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4071 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4072 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4073 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4074 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4075 PetscFunctionReturn(0); 4076 } 4077 4078 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4079 4080 #undef __FUNCT__ 4081 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4082 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4083 { 4084 PetscErrorCode ierr; 4085 Mat B_mpi; 4086 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4087 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4088 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4089 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4090 PetscInt len,proc,*dnz,*onz,bs,cbs; 4091 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4092 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4093 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4094 MPI_Status *status; 4095 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4096 PetscBT lnkbt; 4097 Mat_Merge_SeqsToMPI *merge; 4098 PetscContainer container; 4099 4100 PetscFunctionBegin; 4101 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4102 4103 /* make sure it is a PETSc comm */ 4104 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4105 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4106 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4107 4108 ierr = PetscNew(&merge);CHKERRQ(ierr); 4109 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4110 4111 /* determine row ownership */ 4112 /*---------------------------------------------------------*/ 4113 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4114 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4115 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4116 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4117 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4118 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4119 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4120 4121 m = merge->rowmap->n; 4122 owners = merge->rowmap->range; 4123 4124 /* determine the number of messages to send, their lengths */ 4125 /*---------------------------------------------------------*/ 4126 len_s = merge->len_s; 4127 4128 len = 0; /* length of buf_si[] */ 4129 merge->nsend = 0; 4130 for (proc=0; proc<size; proc++) { 4131 len_si[proc] = 0; 4132 if (proc == rank) { 4133 len_s[proc] = 0; 4134 } else { 4135 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4136 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4137 } 4138 if (len_s[proc]) { 4139 merge->nsend++; 4140 nrows = 0; 4141 for (i=owners[proc]; i<owners[proc+1]; i++) { 4142 if (ai[i+1] > ai[i]) nrows++; 4143 } 4144 len_si[proc] = 2*(nrows+1); 4145 len += len_si[proc]; 4146 } 4147 } 4148 4149 /* determine the number and length of messages to receive for ij-structure */ 4150 /*-------------------------------------------------------------------------*/ 4151 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4152 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4153 4154 /* post the Irecv of j-structure */ 4155 /*-------------------------------*/ 4156 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4157 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4158 4159 /* post the Isend of j-structure */ 4160 /*--------------------------------*/ 4161 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4162 4163 for (proc=0, k=0; proc<size; proc++) { 4164 if (!len_s[proc]) continue; 4165 i = owners[proc]; 4166 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4167 k++; 4168 } 4169 4170 /* receives and sends of j-structure are complete */ 4171 /*------------------------------------------------*/ 4172 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4173 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4174 4175 /* send and recv i-structure */ 4176 /*---------------------------*/ 4177 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4178 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4179 4180 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4181 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4182 for (proc=0,k=0; proc<size; proc++) { 4183 if (!len_s[proc]) continue; 4184 /* form outgoing message for i-structure: 4185 buf_si[0]: nrows to be sent 4186 [1:nrows]: row index (global) 4187 [nrows+1:2*nrows+1]: i-structure index 4188 */ 4189 /*-------------------------------------------*/ 4190 nrows = len_si[proc]/2 - 1; 4191 buf_si_i = buf_si + nrows+1; 4192 buf_si[0] = nrows; 4193 buf_si_i[0] = 0; 4194 nrows = 0; 4195 for (i=owners[proc]; i<owners[proc+1]; i++) { 4196 anzi = ai[i+1] - ai[i]; 4197 if (anzi) { 4198 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4199 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4200 nrows++; 4201 } 4202 } 4203 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4204 k++; 4205 buf_si += len_si[proc]; 4206 } 4207 4208 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4209 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4210 4211 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4212 for (i=0; i<merge->nrecv; i++) { 4213 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4214 } 4215 4216 ierr = PetscFree(len_si);CHKERRQ(ierr); 4217 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4218 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4219 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4220 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4221 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4222 ierr = PetscFree(status);CHKERRQ(ierr); 4223 4224 /* compute a local seq matrix in each processor */ 4225 /*----------------------------------------------*/ 4226 /* allocate bi array and free space for accumulating nonzero column info */ 4227 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4228 bi[0] = 0; 4229 4230 /* create and initialize a linked list */ 4231 nlnk = N+1; 4232 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4233 4234 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4235 len = ai[owners[rank+1]] - ai[owners[rank]]; 4236 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4237 4238 current_space = free_space; 4239 4240 /* determine symbolic info for each local row */ 4241 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4242 4243 for (k=0; k<merge->nrecv; k++) { 4244 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4245 nrows = *buf_ri_k[k]; 4246 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4247 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4248 } 4249 4250 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4251 len = 0; 4252 for (i=0; i<m; i++) { 4253 bnzi = 0; 4254 /* add local non-zero cols of this proc's seqmat into lnk */ 4255 arow = owners[rank] + i; 4256 anzi = ai[arow+1] - ai[arow]; 4257 aj = a->j + ai[arow]; 4258 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4259 bnzi += nlnk; 4260 /* add received col data into lnk */ 4261 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4262 if (i == *nextrow[k]) { /* i-th row */ 4263 anzi = *(nextai[k]+1) - *nextai[k]; 4264 aj = buf_rj[k] + *nextai[k]; 4265 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4266 bnzi += nlnk; 4267 nextrow[k]++; nextai[k]++; 4268 } 4269 } 4270 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4271 4272 /* if free space is not available, make more free space */ 4273 if (current_space->local_remaining<bnzi) { 4274 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4275 nspacedouble++; 4276 } 4277 /* copy data into free space, then initialize lnk */ 4278 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4279 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4280 4281 current_space->array += bnzi; 4282 current_space->local_used += bnzi; 4283 current_space->local_remaining -= bnzi; 4284 4285 bi[i+1] = bi[i] + bnzi; 4286 } 4287 4288 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4289 4290 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4291 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4292 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4293 4294 /* create symbolic parallel matrix B_mpi */ 4295 /*---------------------------------------*/ 4296 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4297 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4298 if (n==PETSC_DECIDE) { 4299 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4300 } else { 4301 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4302 } 4303 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4304 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4305 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4306 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4307 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4308 4309 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4310 B_mpi->assembled = PETSC_FALSE; 4311 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4312 merge->bi = bi; 4313 merge->bj = bj; 4314 merge->buf_ri = buf_ri; 4315 merge->buf_rj = buf_rj; 4316 merge->coi = NULL; 4317 merge->coj = NULL; 4318 merge->owners_co = NULL; 4319 4320 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4321 4322 /* attach the supporting struct to B_mpi for reuse */ 4323 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4324 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4325 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4326 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4327 *mpimat = B_mpi; 4328 4329 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4330 PetscFunctionReturn(0); 4331 } 4332 4333 #undef __FUNCT__ 4334 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4335 /*@C 4336 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4337 matrices from each processor 4338 4339 Collective on MPI_Comm 4340 4341 Input Parameters: 4342 + comm - the communicators the parallel matrix will live on 4343 . seqmat - the input sequential matrices 4344 . m - number of local rows (or PETSC_DECIDE) 4345 . n - number of local columns (or PETSC_DECIDE) 4346 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4347 4348 Output Parameter: 4349 . mpimat - the parallel matrix generated 4350 4351 Level: advanced 4352 4353 Notes: 4354 The dimensions of the sequential matrix in each processor MUST be the same. 4355 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4356 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4357 @*/ 4358 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4359 { 4360 PetscErrorCode ierr; 4361 PetscMPIInt size; 4362 4363 PetscFunctionBegin; 4364 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4365 if (size == 1) { 4366 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4367 if (scall == MAT_INITIAL_MATRIX) { 4368 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4369 } else { 4370 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4371 } 4372 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4373 PetscFunctionReturn(0); 4374 } 4375 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4376 if (scall == MAT_INITIAL_MATRIX) { 4377 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4378 } 4379 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4380 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4381 PetscFunctionReturn(0); 4382 } 4383 4384 #undef __FUNCT__ 4385 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4386 /*@ 4387 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4388 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4389 with MatGetSize() 4390 4391 Not Collective 4392 4393 Input Parameters: 4394 + A - the matrix 4395 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4396 4397 Output Parameter: 4398 . A_loc - the local sequential matrix generated 4399 4400 Level: developer 4401 4402 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4403 4404 @*/ 4405 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4406 { 4407 PetscErrorCode ierr; 4408 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4409 Mat_SeqAIJ *mat,*a,*b; 4410 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4411 MatScalar *aa,*ba,*cam; 4412 PetscScalar *ca; 4413 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4414 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4415 PetscBool match; 4416 MPI_Comm comm; 4417 PetscMPIInt size; 4418 4419 PetscFunctionBegin; 4420 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4421 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4422 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4423 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4424 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4425 4426 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4427 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4428 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4429 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4430 aa = a->a; ba = b->a; 4431 if (scall == MAT_INITIAL_MATRIX) { 4432 if (size == 1) { 4433 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4434 PetscFunctionReturn(0); 4435 } 4436 4437 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4438 ci[0] = 0; 4439 for (i=0; i<am; i++) { 4440 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4441 } 4442 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4443 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4444 k = 0; 4445 for (i=0; i<am; i++) { 4446 ncols_o = bi[i+1] - bi[i]; 4447 ncols_d = ai[i+1] - ai[i]; 4448 /* off-diagonal portion of A */ 4449 for (jo=0; jo<ncols_o; jo++) { 4450 col = cmap[*bj]; 4451 if (col >= cstart) break; 4452 cj[k] = col; bj++; 4453 ca[k++] = *ba++; 4454 } 4455 /* diagonal portion of A */ 4456 for (j=0; j<ncols_d; j++) { 4457 cj[k] = cstart + *aj++; 4458 ca[k++] = *aa++; 4459 } 4460 /* off-diagonal portion of A */ 4461 for (j=jo; j<ncols_o; j++) { 4462 cj[k] = cmap[*bj++]; 4463 ca[k++] = *ba++; 4464 } 4465 } 4466 /* put together the new matrix */ 4467 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4468 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4469 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4470 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4471 mat->free_a = PETSC_TRUE; 4472 mat->free_ij = PETSC_TRUE; 4473 mat->nonew = 0; 4474 } else if (scall == MAT_REUSE_MATRIX) { 4475 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4476 ci = mat->i; cj = mat->j; cam = mat->a; 4477 for (i=0; i<am; i++) { 4478 /* off-diagonal portion of A */ 4479 ncols_o = bi[i+1] - bi[i]; 4480 for (jo=0; jo<ncols_o; jo++) { 4481 col = cmap[*bj]; 4482 if (col >= cstart) break; 4483 *cam++ = *ba++; bj++; 4484 } 4485 /* diagonal portion of A */ 4486 ncols_d = ai[i+1] - ai[i]; 4487 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4488 /* off-diagonal portion of A */ 4489 for (j=jo; j<ncols_o; j++) { 4490 *cam++ = *ba++; bj++; 4491 } 4492 } 4493 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4494 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4495 PetscFunctionReturn(0); 4496 } 4497 4498 #undef __FUNCT__ 4499 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4500 /*@C 4501 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4502 4503 Not Collective 4504 4505 Input Parameters: 4506 + A - the matrix 4507 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4508 - row, col - index sets of rows and columns to extract (or NULL) 4509 4510 Output Parameter: 4511 . A_loc - the local sequential matrix generated 4512 4513 Level: developer 4514 4515 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4516 4517 @*/ 4518 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4519 { 4520 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4521 PetscErrorCode ierr; 4522 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4523 IS isrowa,iscola; 4524 Mat *aloc; 4525 PetscBool match; 4526 4527 PetscFunctionBegin; 4528 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4529 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4530 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4531 if (!row) { 4532 start = A->rmap->rstart; end = A->rmap->rend; 4533 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4534 } else { 4535 isrowa = *row; 4536 } 4537 if (!col) { 4538 start = A->cmap->rstart; 4539 cmap = a->garray; 4540 nzA = a->A->cmap->n; 4541 nzB = a->B->cmap->n; 4542 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4543 ncols = 0; 4544 for (i=0; i<nzB; i++) { 4545 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4546 else break; 4547 } 4548 imark = i; 4549 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4550 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4551 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4552 } else { 4553 iscola = *col; 4554 } 4555 if (scall != MAT_INITIAL_MATRIX) { 4556 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4557 aloc[0] = *A_loc; 4558 } 4559 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4560 *A_loc = aloc[0]; 4561 ierr = PetscFree(aloc);CHKERRQ(ierr); 4562 if (!row) { 4563 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4564 } 4565 if (!col) { 4566 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4567 } 4568 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4569 PetscFunctionReturn(0); 4570 } 4571 4572 #undef __FUNCT__ 4573 #define __FUNCT__ "MatGetBrowsOfAcols" 4574 /*@C 4575 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4576 4577 Collective on Mat 4578 4579 Input Parameters: 4580 + A,B - the matrices in mpiaij format 4581 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4582 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4583 4584 Output Parameter: 4585 + rowb, colb - index sets of rows and columns of B to extract 4586 - B_seq - the sequential matrix generated 4587 4588 Level: developer 4589 4590 @*/ 4591 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4592 { 4593 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4594 PetscErrorCode ierr; 4595 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4596 IS isrowb,iscolb; 4597 Mat *bseq=NULL; 4598 4599 PetscFunctionBegin; 4600 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4601 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4602 } 4603 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4604 4605 if (scall == MAT_INITIAL_MATRIX) { 4606 start = A->cmap->rstart; 4607 cmap = a->garray; 4608 nzA = a->A->cmap->n; 4609 nzB = a->B->cmap->n; 4610 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4611 ncols = 0; 4612 for (i=0; i<nzB; i++) { /* row < local row index */ 4613 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4614 else break; 4615 } 4616 imark = i; 4617 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4618 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4619 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4620 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4621 } else { 4622 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4623 isrowb = *rowb; iscolb = *colb; 4624 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4625 bseq[0] = *B_seq; 4626 } 4627 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4628 *B_seq = bseq[0]; 4629 ierr = PetscFree(bseq);CHKERRQ(ierr); 4630 if (!rowb) { 4631 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4632 } else { 4633 *rowb = isrowb; 4634 } 4635 if (!colb) { 4636 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4637 } else { 4638 *colb = iscolb; 4639 } 4640 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4641 PetscFunctionReturn(0); 4642 } 4643 4644 #undef __FUNCT__ 4645 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4646 /* 4647 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4648 of the OFF-DIAGONAL portion of local A 4649 4650 Collective on Mat 4651 4652 Input Parameters: 4653 + A,B - the matrices in mpiaij format 4654 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4655 4656 Output Parameter: 4657 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4658 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4659 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4660 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4661 4662 Level: developer 4663 4664 */ 4665 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4666 { 4667 VecScatter_MPI_General *gen_to,*gen_from; 4668 PetscErrorCode ierr; 4669 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4670 Mat_SeqAIJ *b_oth; 4671 VecScatter ctx =a->Mvctx; 4672 MPI_Comm comm; 4673 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4674 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4675 PetscScalar *rvalues,*svalues; 4676 MatScalar *b_otha,*bufa,*bufA; 4677 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4678 MPI_Request *rwaits = NULL,*swaits = NULL; 4679 MPI_Status *sstatus,rstatus; 4680 PetscMPIInt jj,size; 4681 PetscInt *cols,sbs,rbs; 4682 PetscScalar *vals; 4683 4684 PetscFunctionBegin; 4685 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4686 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4687 if (size == 1) PetscFunctionReturn(0); 4688 4689 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4690 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4691 } 4692 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4693 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4694 4695 gen_to = (VecScatter_MPI_General*)ctx->todata; 4696 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4697 rvalues = gen_from->values; /* holds the length of receiving row */ 4698 svalues = gen_to->values; /* holds the length of sending row */ 4699 nrecvs = gen_from->n; 4700 nsends = gen_to->n; 4701 4702 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4703 srow = gen_to->indices; /* local row index to be sent */ 4704 sstarts = gen_to->starts; 4705 sprocs = gen_to->procs; 4706 sstatus = gen_to->sstatus; 4707 sbs = gen_to->bs; 4708 rstarts = gen_from->starts; 4709 rprocs = gen_from->procs; 4710 rbs = gen_from->bs; 4711 4712 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4713 if (scall == MAT_INITIAL_MATRIX) { 4714 /* i-array */ 4715 /*---------*/ 4716 /* post receives */ 4717 for (i=0; i<nrecvs; i++) { 4718 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4719 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4720 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4721 } 4722 4723 /* pack the outgoing message */ 4724 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4725 4726 sstartsj[0] = 0; 4727 rstartsj[0] = 0; 4728 len = 0; /* total length of j or a array to be sent */ 4729 k = 0; 4730 for (i=0; i<nsends; i++) { 4731 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4732 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4733 for (j=0; j<nrows; j++) { 4734 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4735 for (l=0; l<sbs; l++) { 4736 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4737 4738 rowlen[j*sbs+l] = ncols; 4739 4740 len += ncols; 4741 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4742 } 4743 k++; 4744 } 4745 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4746 4747 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4748 } 4749 /* recvs and sends of i-array are completed */ 4750 i = nrecvs; 4751 while (i--) { 4752 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4753 } 4754 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4755 4756 /* allocate buffers for sending j and a arrays */ 4757 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4758 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4759 4760 /* create i-array of B_oth */ 4761 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4762 4763 b_othi[0] = 0; 4764 len = 0; /* total length of j or a array to be received */ 4765 k = 0; 4766 for (i=0; i<nrecvs; i++) { 4767 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4768 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4769 for (j=0; j<nrows; j++) { 4770 b_othi[k+1] = b_othi[k] + rowlen[j]; 4771 len += rowlen[j]; k++; 4772 } 4773 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4774 } 4775 4776 /* allocate space for j and a arrrays of B_oth */ 4777 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4778 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4779 4780 /* j-array */ 4781 /*---------*/ 4782 /* post receives of j-array */ 4783 for (i=0; i<nrecvs; i++) { 4784 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4785 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4786 } 4787 4788 /* pack the outgoing message j-array */ 4789 k = 0; 4790 for (i=0; i<nsends; i++) { 4791 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4792 bufJ = bufj+sstartsj[i]; 4793 for (j=0; j<nrows; j++) { 4794 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4795 for (ll=0; ll<sbs; ll++) { 4796 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4797 for (l=0; l<ncols; l++) { 4798 *bufJ++ = cols[l]; 4799 } 4800 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4801 } 4802 } 4803 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4804 } 4805 4806 /* recvs and sends of j-array are completed */ 4807 i = nrecvs; 4808 while (i--) { 4809 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4810 } 4811 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4812 } else if (scall == MAT_REUSE_MATRIX) { 4813 sstartsj = *startsj_s; 4814 rstartsj = *startsj_r; 4815 bufa = *bufa_ptr; 4816 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4817 b_otha = b_oth->a; 4818 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4819 4820 /* a-array */ 4821 /*---------*/ 4822 /* post receives of a-array */ 4823 for (i=0; i<nrecvs; i++) { 4824 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4825 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4826 } 4827 4828 /* pack the outgoing message a-array */ 4829 k = 0; 4830 for (i=0; i<nsends; i++) { 4831 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4832 bufA = bufa+sstartsj[i]; 4833 for (j=0; j<nrows; j++) { 4834 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4835 for (ll=0; ll<sbs; ll++) { 4836 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4837 for (l=0; l<ncols; l++) { 4838 *bufA++ = vals[l]; 4839 } 4840 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4841 } 4842 } 4843 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4844 } 4845 /* recvs and sends of a-array are completed */ 4846 i = nrecvs; 4847 while (i--) { 4848 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4849 } 4850 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4851 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4852 4853 if (scall == MAT_INITIAL_MATRIX) { 4854 /* put together the new matrix */ 4855 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4856 4857 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4858 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4859 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4860 b_oth->free_a = PETSC_TRUE; 4861 b_oth->free_ij = PETSC_TRUE; 4862 b_oth->nonew = 0; 4863 4864 ierr = PetscFree(bufj);CHKERRQ(ierr); 4865 if (!startsj_s || !bufa_ptr) { 4866 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4867 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4868 } else { 4869 *startsj_s = sstartsj; 4870 *startsj_r = rstartsj; 4871 *bufa_ptr = bufa; 4872 } 4873 } 4874 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4875 PetscFunctionReturn(0); 4876 } 4877 4878 #undef __FUNCT__ 4879 #define __FUNCT__ "MatGetCommunicationStructs" 4880 /*@C 4881 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4882 4883 Not Collective 4884 4885 Input Parameters: 4886 . A - The matrix in mpiaij format 4887 4888 Output Parameter: 4889 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4890 . colmap - A map from global column index to local index into lvec 4891 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4892 4893 Level: developer 4894 4895 @*/ 4896 #if defined(PETSC_USE_CTABLE) 4897 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4898 #else 4899 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4900 #endif 4901 { 4902 Mat_MPIAIJ *a; 4903 4904 PetscFunctionBegin; 4905 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4906 PetscValidPointer(lvec, 2); 4907 PetscValidPointer(colmap, 3); 4908 PetscValidPointer(multScatter, 4); 4909 a = (Mat_MPIAIJ*) A->data; 4910 if (lvec) *lvec = a->lvec; 4911 if (colmap) *colmap = a->colmap; 4912 if (multScatter) *multScatter = a->Mvctx; 4913 PetscFunctionReturn(0); 4914 } 4915 4916 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4917 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4918 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4919 #if defined(PETSC_HAVE_ELEMENTAL) 4920 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4921 #endif 4922 4923 #undef __FUNCT__ 4924 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4925 /* 4926 Computes (B'*A')' since computing B*A directly is untenable 4927 4928 n p p 4929 ( ) ( ) ( ) 4930 m ( A ) * n ( B ) = m ( C ) 4931 ( ) ( ) ( ) 4932 4933 */ 4934 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4935 { 4936 PetscErrorCode ierr; 4937 Mat At,Bt,Ct; 4938 4939 PetscFunctionBegin; 4940 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4941 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4942 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4943 ierr = MatDestroy(&At);CHKERRQ(ierr); 4944 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4945 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4946 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4947 PetscFunctionReturn(0); 4948 } 4949 4950 #undef __FUNCT__ 4951 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4952 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4953 { 4954 PetscErrorCode ierr; 4955 PetscInt m=A->rmap->n,n=B->cmap->n; 4956 Mat Cmat; 4957 4958 PetscFunctionBegin; 4959 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4960 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4961 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4962 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4963 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4964 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4965 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4966 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4967 4968 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4969 4970 *C = Cmat; 4971 PetscFunctionReturn(0); 4972 } 4973 4974 /* ----------------------------------------------------------------*/ 4975 #undef __FUNCT__ 4976 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4977 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4978 { 4979 PetscErrorCode ierr; 4980 4981 PetscFunctionBegin; 4982 if (scall == MAT_INITIAL_MATRIX) { 4983 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4984 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4985 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4986 } 4987 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4988 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4989 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4990 PetscFunctionReturn(0); 4991 } 4992 4993 /*MC 4994 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4995 4996 Options Database Keys: 4997 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4998 4999 Level: beginner 5000 5001 .seealso: MatCreateAIJ() 5002 M*/ 5003 5004 #undef __FUNCT__ 5005 #define __FUNCT__ "MatCreate_MPIAIJ" 5006 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5007 { 5008 Mat_MPIAIJ *b; 5009 PetscErrorCode ierr; 5010 PetscMPIInt size; 5011 5012 PetscFunctionBegin; 5013 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5014 5015 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5016 B->data = (void*)b; 5017 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5018 B->assembled = PETSC_FALSE; 5019 B->insertmode = NOT_SET_VALUES; 5020 b->size = size; 5021 5022 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5023 5024 /* build cache for off array entries formed */ 5025 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5026 5027 b->donotstash = PETSC_FALSE; 5028 b->colmap = 0; 5029 b->garray = 0; 5030 b->roworiented = PETSC_TRUE; 5031 5032 /* stuff used for matrix vector multiply */ 5033 b->lvec = NULL; 5034 b->Mvctx = NULL; 5035 5036 /* stuff for MatGetRow() */ 5037 b->rowindices = 0; 5038 b->rowvalues = 0; 5039 b->getrowactive = PETSC_FALSE; 5040 5041 /* flexible pointer used in CUSP/CUSPARSE classes */ 5042 b->spptr = NULL; 5043 5044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5054 #if defined(PETSC_HAVE_ELEMENTAL) 5055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5056 #endif 5057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5060 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5061 PetscFunctionReturn(0); 5062 } 5063 5064 #undef __FUNCT__ 5065 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5066 /*@C 5067 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5068 and "off-diagonal" part of the matrix in CSR format. 5069 5070 Collective on MPI_Comm 5071 5072 Input Parameters: 5073 + comm - MPI communicator 5074 . m - number of local rows (Cannot be PETSC_DECIDE) 5075 . n - This value should be the same as the local size used in creating the 5076 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5077 calculated if N is given) For square matrices n is almost always m. 5078 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5079 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5080 . i - row indices for "diagonal" portion of matrix 5081 . j - column indices 5082 . a - matrix values 5083 . oi - row indices for "off-diagonal" portion of matrix 5084 . oj - column indices 5085 - oa - matrix values 5086 5087 Output Parameter: 5088 . mat - the matrix 5089 5090 Level: advanced 5091 5092 Notes: 5093 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5094 must free the arrays once the matrix has been destroyed and not before. 5095 5096 The i and j indices are 0 based 5097 5098 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5099 5100 This sets local rows and cannot be used to set off-processor values. 5101 5102 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5103 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5104 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5105 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5106 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5107 communication if it is known that only local entries will be set. 5108 5109 .keywords: matrix, aij, compressed row, sparse, parallel 5110 5111 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5112 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5113 C@*/ 5114 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5115 { 5116 PetscErrorCode ierr; 5117 Mat_MPIAIJ *maij; 5118 5119 PetscFunctionBegin; 5120 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5121 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5122 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5123 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5124 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5125 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5126 maij = (Mat_MPIAIJ*) (*mat)->data; 5127 5128 (*mat)->preallocated = PETSC_TRUE; 5129 5130 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5131 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5132 5133 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5134 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5135 5136 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5137 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5138 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5139 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5140 5141 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5142 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5143 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5144 PetscFunctionReturn(0); 5145 } 5146 5147 /* 5148 Special version for direct calls from Fortran 5149 */ 5150 #include <petsc-private/fortranimpl.h> 5151 5152 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5153 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5154 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5155 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5156 #endif 5157 5158 /* Change these macros so can be used in void function */ 5159 #undef CHKERRQ 5160 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5161 #undef SETERRQ2 5162 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5163 #undef SETERRQ3 5164 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5165 #undef SETERRQ 5166 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5167 5168 #undef __FUNCT__ 5169 #define __FUNCT__ "matsetvaluesmpiaij_" 5170 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5171 { 5172 Mat mat = *mmat; 5173 PetscInt m = *mm, n = *mn; 5174 InsertMode addv = *maddv; 5175 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5176 PetscScalar value; 5177 PetscErrorCode ierr; 5178 5179 MatCheckPreallocated(mat,1); 5180 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5181 5182 #if defined(PETSC_USE_DEBUG) 5183 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5184 #endif 5185 { 5186 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5187 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5188 PetscBool roworiented = aij->roworiented; 5189 5190 /* Some Variables required in the macro */ 5191 Mat A = aij->A; 5192 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5193 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5194 MatScalar *aa = a->a; 5195 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5196 Mat B = aij->B; 5197 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5198 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5199 MatScalar *ba = b->a; 5200 5201 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5202 PetscInt nonew = a->nonew; 5203 MatScalar *ap1,*ap2; 5204 5205 PetscFunctionBegin; 5206 for (i=0; i<m; i++) { 5207 if (im[i] < 0) continue; 5208 #if defined(PETSC_USE_DEBUG) 5209 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5210 #endif 5211 if (im[i] >= rstart && im[i] < rend) { 5212 row = im[i] - rstart; 5213 lastcol1 = -1; 5214 rp1 = aj + ai[row]; 5215 ap1 = aa + ai[row]; 5216 rmax1 = aimax[row]; 5217 nrow1 = ailen[row]; 5218 low1 = 0; 5219 high1 = nrow1; 5220 lastcol2 = -1; 5221 rp2 = bj + bi[row]; 5222 ap2 = ba + bi[row]; 5223 rmax2 = bimax[row]; 5224 nrow2 = bilen[row]; 5225 low2 = 0; 5226 high2 = nrow2; 5227 5228 for (j=0; j<n; j++) { 5229 if (roworiented) value = v[i*n+j]; 5230 else value = v[i+j*m]; 5231 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5232 if (in[j] >= cstart && in[j] < cend) { 5233 col = in[j] - cstart; 5234 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5235 } else if (in[j] < 0) continue; 5236 #if defined(PETSC_USE_DEBUG) 5237 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5238 #endif 5239 else { 5240 if (mat->was_assembled) { 5241 if (!aij->colmap) { 5242 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5243 } 5244 #if defined(PETSC_USE_CTABLE) 5245 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5246 col--; 5247 #else 5248 col = aij->colmap[in[j]] - 1; 5249 #endif 5250 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5251 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5252 col = in[j]; 5253 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5254 B = aij->B; 5255 b = (Mat_SeqAIJ*)B->data; 5256 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5257 rp2 = bj + bi[row]; 5258 ap2 = ba + bi[row]; 5259 rmax2 = bimax[row]; 5260 nrow2 = bilen[row]; 5261 low2 = 0; 5262 high2 = nrow2; 5263 bm = aij->B->rmap->n; 5264 ba = b->a; 5265 } 5266 } else col = in[j]; 5267 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5268 } 5269 } 5270 } else if (!aij->donotstash) { 5271 if (roworiented) { 5272 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5273 } else { 5274 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5275 } 5276 } 5277 } 5278 } 5279 PetscFunctionReturnVoid(); 5280 } 5281 5282