1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petsc-private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatDistribute_MPIAIJ" 193 /* 194 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 195 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 196 197 Only for square matrices 198 199 Used by a preconditioner, hence PETSC_EXTERN 200 */ 201 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 202 { 203 PetscMPIInt rank,size; 204 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 205 PetscErrorCode ierr; 206 Mat mat; 207 Mat_SeqAIJ *gmata; 208 PetscMPIInt tag; 209 MPI_Status status; 210 PetscBool aij; 211 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 212 213 PetscFunctionBegin; 214 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 215 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 216 if (!rank) { 217 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 218 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 219 } 220 if (reuse == MAT_INITIAL_MATRIX) { 221 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 222 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 223 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 224 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 225 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 226 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 227 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 228 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 229 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 230 231 rowners[0] = 0; 232 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 233 rstart = rowners[rank]; 234 rend = rowners[rank+1]; 235 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 236 if (!rank) { 237 gmata = (Mat_SeqAIJ*) gmat->data; 238 /* send row lengths to all processors */ 239 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 240 for (i=1; i<size; i++) { 241 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 242 } 243 /* determine number diagonal and off-diagonal counts */ 244 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 245 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 246 jj = 0; 247 for (i=0; i<m; i++) { 248 for (j=0; j<dlens[i]; j++) { 249 if (gmata->j[jj] < rstart) ld[i]++; 250 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 251 jj++; 252 } 253 } 254 /* send column indices to other processes */ 255 for (i=1; i<size; i++) { 256 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 257 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 258 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 259 } 260 261 /* send numerical values to other processes */ 262 for (i=1; i<size; i++) { 263 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 264 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 265 } 266 gmataa = gmata->a; 267 gmataj = gmata->j; 268 269 } else { 270 /* receive row lengths */ 271 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 272 /* receive column indices */ 273 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 274 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 276 /* determine number diagonal and off-diagonal counts */ 277 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 278 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 279 jj = 0; 280 for (i=0; i<m; i++) { 281 for (j=0; j<dlens[i]; j++) { 282 if (gmataj[jj] < rstart) ld[i]++; 283 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 284 jj++; 285 } 286 } 287 /* receive numerical values */ 288 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 289 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 290 } 291 /* set preallocation */ 292 for (i=0; i<m; i++) { 293 dlens[i] -= olens[i]; 294 } 295 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 296 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 297 298 for (i=0; i<m; i++) { 299 dlens[i] += olens[i]; 300 } 301 cnt = 0; 302 for (i=0; i<m; i++) { 303 row = rstart + i; 304 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 305 cnt += dlens[i]; 306 } 307 if (rank) { 308 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 309 } 310 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 311 ierr = PetscFree(rowners);CHKERRQ(ierr); 312 313 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 314 315 *inmat = mat; 316 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 317 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 318 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 319 mat = *inmat; 320 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 321 if (!rank) { 322 /* send numerical values to other processes */ 323 gmata = (Mat_SeqAIJ*) gmat->data; 324 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 325 gmataa = gmata->a; 326 for (i=1; i<size; i++) { 327 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 328 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 329 } 330 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 331 } else { 332 /* receive numerical values from process 0*/ 333 nz = Ad->nz + Ao->nz; 334 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 335 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 336 } 337 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 338 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 339 ad = Ad->a; 340 ao = Ao->a; 341 if (mat->rmap->n) { 342 i = 0; 343 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 344 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 345 } 346 for (i=1; i<mat->rmap->n; i++) { 347 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 348 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 349 } 350 i--; 351 if (mat->rmap->n) { 352 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 353 } 354 if (rank) { 355 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 356 } 357 } 358 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 /* 364 Local utility routine that creates a mapping from the global column 365 number to the local number in the off-diagonal part of the local 366 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 367 a slightly higher hash table cost; without it it is not scalable (each processor 368 has an order N integer array but is fast to acess. 369 */ 370 #undef __FUNCT__ 371 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 372 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 373 { 374 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 375 PetscErrorCode ierr; 376 PetscInt n = aij->B->cmap->n,i; 377 378 PetscFunctionBegin; 379 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 380 #if defined(PETSC_USE_CTABLE) 381 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 382 for (i=0; i<n; i++) { 383 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 384 } 385 #else 386 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 387 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 388 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 389 #endif 390 PetscFunctionReturn(0); 391 } 392 393 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 394 { \ 395 if (col <= lastcol1) low1 = 0; \ 396 else high1 = nrow1; \ 397 lastcol1 = col;\ 398 while (high1-low1 > 5) { \ 399 t = (low1+high1)/2; \ 400 if (rp1[t] > col) high1 = t; \ 401 else low1 = t; \ 402 } \ 403 for (_i=low1; _i<high1; _i++) { \ 404 if (rp1[_i] > col) break; \ 405 if (rp1[_i] == col) { \ 406 if (addv == ADD_VALUES) ap1[_i] += value; \ 407 else ap1[_i] = value; \ 408 goto a_noinsert; \ 409 } \ 410 } \ 411 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 412 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 413 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 414 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 415 N = nrow1++ - 1; a->nz++; high1++; \ 416 /* shift up all the later entries in this row */ \ 417 for (ii=N; ii>=_i; ii--) { \ 418 rp1[ii+1] = rp1[ii]; \ 419 ap1[ii+1] = ap1[ii]; \ 420 } \ 421 rp1[_i] = col; \ 422 ap1[_i] = value; \ 423 A->nonzerostate++;\ 424 a_noinsert: ; \ 425 ailen[row] = nrow1; \ 426 } 427 428 429 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 430 { \ 431 if (col <= lastcol2) low2 = 0; \ 432 else high2 = nrow2; \ 433 lastcol2 = col; \ 434 while (high2-low2 > 5) { \ 435 t = (low2+high2)/2; \ 436 if (rp2[t] > col) high2 = t; \ 437 else low2 = t; \ 438 } \ 439 for (_i=low2; _i<high2; _i++) { \ 440 if (rp2[_i] > col) break; \ 441 if (rp2[_i] == col) { \ 442 if (addv == ADD_VALUES) ap2[_i] += value; \ 443 else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 448 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 450 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 451 N = nrow2++ - 1; b->nz++; high2++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp2[ii+1] = rp2[ii]; \ 455 ap2[ii+1] = ap2[ii]; \ 456 } \ 457 rp2[_i] = col; \ 458 ap2[_i] = value; \ 459 B->nonzerostate++; \ 460 b_noinsert: ; \ 461 bilen[row] = nrow2; \ 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 466 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 467 { 468 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 469 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 470 PetscErrorCode ierr; 471 PetscInt l,*garray = mat->garray,diag; 472 473 PetscFunctionBegin; 474 /* code only works for square matrices A */ 475 476 /* find size of row to the left of the diagonal part */ 477 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 478 row = row - diag; 479 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 480 if (garray[b->j[b->i[row]+l]] > diag) break; 481 } 482 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 483 484 /* diagonal part */ 485 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 486 487 /* right of diagonal part */ 488 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatSetValues_MPIAIJ" 494 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 495 { 496 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 497 PetscScalar value; 498 PetscErrorCode ierr; 499 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 500 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 501 PetscBool roworiented = aij->roworiented; 502 503 /* Some Variables required in the macro */ 504 Mat A = aij->A; 505 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 506 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 507 MatScalar *aa = a->a; 508 PetscBool ignorezeroentries = a->ignorezeroentries; 509 Mat B = aij->B; 510 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 511 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 512 MatScalar *ba = b->a; 513 514 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 515 PetscInt nonew; 516 MatScalar *ap1,*ap2; 517 518 PetscFunctionBegin; 519 for (i=0; i<m; i++) { 520 if (im[i] < 0) continue; 521 #if defined(PETSC_USE_DEBUG) 522 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 523 #endif 524 if (im[i] >= rstart && im[i] < rend) { 525 row = im[i] - rstart; 526 lastcol1 = -1; 527 rp1 = aj + ai[row]; 528 ap1 = aa + ai[row]; 529 rmax1 = aimax[row]; 530 nrow1 = ailen[row]; 531 low1 = 0; 532 high1 = nrow1; 533 lastcol2 = -1; 534 rp2 = bj + bi[row]; 535 ap2 = ba + bi[row]; 536 rmax2 = bimax[row]; 537 nrow2 = bilen[row]; 538 low2 = 0; 539 high2 = nrow2; 540 541 for (j=0; j<n; j++) { 542 if (roworiented) value = v[i*n+j]; 543 else value = v[i+j*m]; 544 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 545 if (in[j] >= cstart && in[j] < cend) { 546 col = in[j] - cstart; 547 nonew = a->nonew; 548 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 549 } else if (in[j] < 0) continue; 550 #if defined(PETSC_USE_DEBUG) 551 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 552 #endif 553 else { 554 if (mat->was_assembled) { 555 if (!aij->colmap) { 556 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 557 } 558 #if defined(PETSC_USE_CTABLE) 559 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 560 col--; 561 #else 562 col = aij->colmap[in[j]] - 1; 563 #endif 564 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 565 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 566 col = in[j]; 567 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 568 B = aij->B; 569 b = (Mat_SeqAIJ*)B->data; 570 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 571 rp2 = bj + bi[row]; 572 ap2 = ba + bi[row]; 573 rmax2 = bimax[row]; 574 nrow2 = bilen[row]; 575 low2 = 0; 576 high2 = nrow2; 577 bm = aij->B->rmap->n; 578 ba = b->a; 579 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 580 } else col = in[j]; 581 nonew = b->nonew; 582 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 583 } 584 } 585 } else { 586 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 587 if (!aij->donotstash) { 588 mat->assembled = PETSC_FALSE; 589 if (roworiented) { 590 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 591 } else { 592 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 593 } 594 } 595 } 596 } 597 PetscFunctionReturn(0); 598 } 599 600 #undef __FUNCT__ 601 #define __FUNCT__ "MatGetValues_MPIAIJ" 602 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 603 { 604 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 605 PetscErrorCode ierr; 606 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 607 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 608 609 PetscFunctionBegin; 610 for (i=0; i<m; i++) { 611 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 612 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 613 if (idxm[i] >= rstart && idxm[i] < rend) { 614 row = idxm[i] - rstart; 615 for (j=0; j<n; j++) { 616 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 617 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 618 if (idxn[j] >= cstart && idxn[j] < cend) { 619 col = idxn[j] - cstart; 620 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 621 } else { 622 if (!aij->colmap) { 623 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 624 } 625 #if defined(PETSC_USE_CTABLE) 626 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 627 col--; 628 #else 629 col = aij->colmap[idxn[j]] - 1; 630 #endif 631 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 632 else { 633 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 634 } 635 } 636 } 637 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 638 } 639 PetscFunctionReturn(0); 640 } 641 642 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 643 644 #undef __FUNCT__ 645 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 646 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 647 { 648 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 649 PetscErrorCode ierr; 650 PetscInt nstash,reallocs; 651 InsertMode addv; 652 653 PetscFunctionBegin; 654 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 655 656 /* make sure all processors are either in INSERTMODE or ADDMODE */ 657 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 658 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 659 mat->insertmode = addv; /* in case this processor had no cache */ 660 661 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 662 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 663 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 669 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 670 { 671 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 673 PetscErrorCode ierr; 674 PetscMPIInt n; 675 PetscInt i,j,rstart,ncols,flg; 676 PetscInt *row,*col; 677 PetscBool other_disassembled; 678 PetscScalar *val; 679 InsertMode addv = mat->insertmode; 680 681 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 682 683 PetscFunctionBegin; 684 if (!aij->donotstash && !mat->nooffprocentries) { 685 while (1) { 686 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 687 if (!flg) break; 688 689 for (i=0; i<n; ) { 690 /* Now identify the consecutive vals belonging to the same row */ 691 for (j=i,rstart=row[j]; j<n; j++) { 692 if (row[j] != rstart) break; 693 } 694 if (j < n) ncols = j-i; 695 else ncols = n-i; 696 /* Now assemble all these values with a single function call */ 697 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 698 699 i = j; 700 } 701 } 702 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 703 } 704 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 705 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 706 707 /* determine if any processor has disassembled, if so we must 708 also disassemble ourselfs, in order that we may reassemble. */ 709 /* 710 if nonzero structure of submatrix B cannot change then we know that 711 no processor disassembled thus we can skip this stuff 712 */ 713 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 714 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 715 if (mat->was_assembled && !other_disassembled) { 716 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 717 } 718 } 719 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 720 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 721 } 722 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 723 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 724 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 725 726 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 727 728 aij->rowvalues = 0; 729 730 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 731 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 732 733 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 734 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 735 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 736 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 737 } 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 743 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 744 { 745 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 746 PetscErrorCode ierr; 747 748 PetscFunctionBegin; 749 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 750 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatZeroRows_MPIAIJ" 756 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 757 { 758 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 759 PetscInt *owners = A->rmap->range; 760 PetscInt n = A->rmap->n; 761 PetscSF sf; 762 PetscInt *lrows; 763 PetscSFNode *rrows; 764 PetscInt r, p = 0, len = 0; 765 PetscErrorCode ierr; 766 767 PetscFunctionBegin; 768 /* Create SF where leaves are input rows and roots are owned rows */ 769 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 770 for (r = 0; r < n; ++r) lrows[r] = -1; 771 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 772 for (r = 0; r < N; ++r) { 773 const PetscInt idx = rows[r]; 774 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 775 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 776 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 777 } 778 if (A->nooffproczerorows) { 779 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 780 lrows[len++] = idx - owners[p]; 781 } else { 782 rrows[r].rank = p; 783 rrows[r].index = rows[r] - owners[p]; 784 } 785 } 786 if (!A->nooffproczerorows) { 787 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 788 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 789 /* Collect flags for rows to be zeroed */ 790 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 791 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 792 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 793 /* Compress and put in row numbers */ 794 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 795 } 796 /* fix right hand side if needed */ 797 if (x && b) { 798 const PetscScalar *xx; 799 PetscScalar *bb; 800 801 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 802 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 803 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 804 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 805 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 806 } 807 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 808 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 810 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 811 } else if (diag != 0.0) { 812 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 813 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 814 for (r = 0; r < len; ++r) { 815 const PetscInt row = lrows[r] + A->rmap->rstart; 816 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 817 } 818 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 820 } else { 821 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 822 } 823 ierr = PetscFree(lrows);CHKERRQ(ierr); 824 825 /* only change matrix nonzero state if pattern was allowed to be changed */ 826 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 827 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 828 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 829 } 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 835 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 836 { 837 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 838 PetscErrorCode ierr; 839 PetscMPIInt n = A->rmap->n; 840 PetscInt i,j,r,m,p = 0,len = 0; 841 PetscInt *lrows,*owners = A->rmap->range; 842 PetscSFNode *rrows; 843 PetscSF sf; 844 const PetscScalar *xx; 845 PetscScalar *bb,*mask; 846 Vec xmask,lmask; 847 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 848 const PetscInt *aj, *ii,*ridx; 849 PetscScalar *aa; 850 851 PetscFunctionBegin; 852 /* Create SF where leaves are input rows and roots are owned rows */ 853 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 854 for (r = 0; r < n; ++r) lrows[r] = -1; 855 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 856 for (r = 0; r < N; ++r) { 857 const PetscInt idx = rows[r]; 858 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 859 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 860 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 861 } 862 rrows[r].rank = p; 863 rrows[r].index = rows[r] - owners[p]; 864 } 865 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 866 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 867 /* Collect flags for rows to be zeroed */ 868 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 869 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 870 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 871 /* Compress and put in row numbers */ 872 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 873 /* zero diagonal part of matrix */ 874 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 875 /* handle off diagonal part of matrix */ 876 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 877 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 878 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 879 for (i=0; i<len; i++) bb[lrows[i]] = 1; 880 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 881 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 884 if (x) { 885 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 888 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 889 } 890 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 891 /* remove zeroed rows of off diagonal matrix */ 892 ii = aij->i; 893 for (i=0; i<len; i++) { 894 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 895 } 896 /* loop over all elements of off process part of matrix zeroing removed columns*/ 897 if (aij->compressedrow.use) { 898 m = aij->compressedrow.nrows; 899 ii = aij->compressedrow.i; 900 ridx = aij->compressedrow.rindex; 901 for (i=0; i<m; i++) { 902 n = ii[i+1] - ii[i]; 903 aj = aij->j + ii[i]; 904 aa = aij->a + ii[i]; 905 906 for (j=0; j<n; j++) { 907 if (PetscAbsScalar(mask[*aj])) { 908 if (b) bb[*ridx] -= *aa*xx[*aj]; 909 *aa = 0.0; 910 } 911 aa++; 912 aj++; 913 } 914 ridx++; 915 } 916 } else { /* do not use compressed row format */ 917 m = l->B->rmap->n; 918 for (i=0; i<m; i++) { 919 n = ii[i+1] - ii[i]; 920 aj = aij->j + ii[i]; 921 aa = aij->a + ii[i]; 922 for (j=0; j<n; j++) { 923 if (PetscAbsScalar(mask[*aj])) { 924 if (b) bb[i] -= *aa*xx[*aj]; 925 *aa = 0.0; 926 } 927 aa++; 928 aj++; 929 } 930 } 931 } 932 if (x) { 933 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 934 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 935 } 936 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 937 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 938 ierr = PetscFree(lrows);CHKERRQ(ierr); 939 940 /* only change matrix nonzero state if pattern was allowed to be changed */ 941 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 942 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 943 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 944 } 945 PetscFunctionReturn(0); 946 } 947 948 #undef __FUNCT__ 949 #define __FUNCT__ "MatMult_MPIAIJ" 950 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 951 { 952 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 953 PetscErrorCode ierr; 954 PetscInt nt; 955 956 PetscFunctionBegin; 957 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 958 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 959 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 960 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 961 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 968 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 969 { 970 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 971 PetscErrorCode ierr; 972 973 PetscFunctionBegin; 974 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatMultAdd_MPIAIJ" 980 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 981 { 982 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 988 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 998 PetscErrorCode ierr; 999 PetscBool merged; 1000 1001 PetscFunctionBegin; 1002 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1003 /* do nondiagonal part */ 1004 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1005 if (!merged) { 1006 /* send it on its way */ 1007 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1008 /* do local part */ 1009 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1010 /* receive remote parts: note this assumes the values are not actually */ 1011 /* added in yy until the next line, */ 1012 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1013 } else { 1014 /* do local part */ 1015 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1016 /* send it on its way */ 1017 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1018 /* values actually were received in the Begin() but we need to call this nop */ 1019 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1026 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1027 { 1028 MPI_Comm comm; 1029 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1030 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1031 IS Me,Notme; 1032 PetscErrorCode ierr; 1033 PetscInt M,N,first,last,*notme,i; 1034 PetscMPIInt size; 1035 1036 PetscFunctionBegin; 1037 /* Easy test: symmetric diagonal block */ 1038 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1039 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1040 if (!*f) PetscFunctionReturn(0); 1041 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1042 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1043 if (size == 1) PetscFunctionReturn(0); 1044 1045 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1046 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1047 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1048 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1049 for (i=0; i<first; i++) notme[i] = i; 1050 for (i=last; i<M; i++) notme[i-last+first] = i; 1051 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1052 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1053 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1054 Aoff = Aoffs[0]; 1055 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1056 Boff = Boffs[0]; 1057 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1058 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1059 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1060 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1061 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1062 ierr = PetscFree(notme);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 /* do nondiagonal part */ 1075 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1076 /* send it on its way */ 1077 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1078 /* do local part */ 1079 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1080 /* receive remote parts */ 1081 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 /* 1086 This only works correctly for square matrices where the subblock A->A is the 1087 diagonal block 1088 */ 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1091 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1092 { 1093 PetscErrorCode ierr; 1094 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1095 1096 PetscFunctionBegin; 1097 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1098 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1099 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatScale_MPIAIJ" 1105 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1112 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatDestroy_MPIAIJ" 1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1119 { 1120 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1121 PetscErrorCode ierr; 1122 1123 PetscFunctionBegin; 1124 #if defined(PETSC_USE_LOG) 1125 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1126 #endif 1127 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1128 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1129 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1130 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1131 #if defined(PETSC_USE_CTABLE) 1132 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1133 #else 1134 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1135 #endif 1136 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1137 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1138 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1139 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1140 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1141 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1142 1143 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1152 #if defined(PETSC_HAVE_ELEMENTAL) 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1154 #endif 1155 PetscFunctionReturn(0); 1156 } 1157 1158 #undef __FUNCT__ 1159 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1160 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1161 { 1162 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1163 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1164 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1165 PetscErrorCode ierr; 1166 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1167 int fd; 1168 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1169 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1170 PetscScalar *column_values; 1171 PetscInt message_count,flowcontrolcount; 1172 FILE *file; 1173 1174 PetscFunctionBegin; 1175 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1176 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1177 nz = A->nz + B->nz; 1178 if (!rank) { 1179 header[0] = MAT_FILE_CLASSID; 1180 header[1] = mat->rmap->N; 1181 header[2] = mat->cmap->N; 1182 1183 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1184 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1185 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 /* get largest number of rows any processor has */ 1187 rlen = mat->rmap->n; 1188 range = mat->rmap->range; 1189 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1190 } else { 1191 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1192 rlen = mat->rmap->n; 1193 } 1194 1195 /* load up the local row counts */ 1196 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1197 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1198 1199 /* store the row lengths to the file */ 1200 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1201 if (!rank) { 1202 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1203 for (i=1; i<size; i++) { 1204 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1205 rlen = range[i+1] - range[i]; 1206 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1207 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1208 } 1209 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1210 } else { 1211 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1212 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1213 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1214 } 1215 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1216 1217 /* load up the local column indices */ 1218 nzmax = nz; /* th processor needs space a largest processor needs */ 1219 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1220 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1221 cnt = 0; 1222 for (i=0; i<mat->rmap->n; i++) { 1223 for (j=B->i[i]; j<B->i[i+1]; j++) { 1224 if ((col = garray[B->j[j]]) > cstart) break; 1225 column_indices[cnt++] = col; 1226 } 1227 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1228 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1229 } 1230 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1231 1232 /* store the column indices to the file */ 1233 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1234 if (!rank) { 1235 MPI_Status status; 1236 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1237 for (i=1; i<size; i++) { 1238 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1239 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1240 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1241 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1242 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1243 } 1244 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1245 } else { 1246 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1247 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1248 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1249 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1250 } 1251 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1252 1253 /* load up the local column values */ 1254 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1255 cnt = 0; 1256 for (i=0; i<mat->rmap->n; i++) { 1257 for (j=B->i[i]; j<B->i[i+1]; j++) { 1258 if (garray[B->j[j]] > cstart) break; 1259 column_values[cnt++] = B->a[j]; 1260 } 1261 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1262 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1263 } 1264 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1265 1266 /* store the column values to the file */ 1267 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1268 if (!rank) { 1269 MPI_Status status; 1270 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1271 for (i=1; i<size; i++) { 1272 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1273 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1274 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1275 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1276 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1277 } 1278 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1279 } else { 1280 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1281 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1282 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1283 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1284 } 1285 ierr = PetscFree(column_values);CHKERRQ(ierr); 1286 1287 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1288 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #include <petscdraw.h> 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1295 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1296 { 1297 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1298 PetscErrorCode ierr; 1299 PetscMPIInt rank = aij->rank,size = aij->size; 1300 PetscBool isdraw,iascii,isbinary; 1301 PetscViewer sviewer; 1302 PetscViewerFormat format; 1303 1304 PetscFunctionBegin; 1305 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1306 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1307 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1308 if (iascii) { 1309 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1310 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1311 MatInfo info; 1312 PetscBool inodes; 1313 1314 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1315 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1316 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1317 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1318 if (!inodes) { 1319 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1320 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1321 } else { 1322 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1323 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1324 } 1325 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1326 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1327 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1328 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1329 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1330 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1331 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1332 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1335 PetscInt inodecount,inodelimit,*inodes; 1336 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1337 if (inodes) { 1338 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1339 } else { 1340 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1341 } 1342 PetscFunctionReturn(0); 1343 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1344 PetscFunctionReturn(0); 1345 } 1346 } else if (isbinary) { 1347 if (size == 1) { 1348 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1349 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1350 } else { 1351 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1352 } 1353 PetscFunctionReturn(0); 1354 } else if (isdraw) { 1355 PetscDraw draw; 1356 PetscBool isnull; 1357 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1358 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1359 } 1360 1361 { 1362 /* assemble the entire matrix onto first processor. */ 1363 Mat A; 1364 Mat_SeqAIJ *Aloc; 1365 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1366 MatScalar *a; 1367 const char *matname; 1368 1369 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1370 if (!rank) { 1371 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1372 } else { 1373 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1374 } 1375 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1376 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1377 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1378 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1379 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1380 1381 /* copy over the A part */ 1382 Aloc = (Mat_SeqAIJ*)aij->A->data; 1383 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1384 row = mat->rmap->rstart; 1385 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1386 for (i=0; i<m; i++) { 1387 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1388 row++; 1389 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1390 } 1391 aj = Aloc->j; 1392 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1393 1394 /* copy over the B part */ 1395 Aloc = (Mat_SeqAIJ*)aij->B->data; 1396 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1397 row = mat->rmap->rstart; 1398 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1399 ct = cols; 1400 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1401 for (i=0; i<m; i++) { 1402 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1403 row++; 1404 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1405 } 1406 ierr = PetscFree(ct);CHKERRQ(ierr); 1407 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1408 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1409 /* 1410 Everyone has to call to draw the matrix since the graphics waits are 1411 synchronized across all processors that share the PetscDraw object 1412 */ 1413 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1414 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1415 if (!rank) { 1416 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1417 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1418 } 1419 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1420 ierr = MatDestroy(&A);CHKERRQ(ierr); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatView_MPIAIJ" 1427 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1428 { 1429 PetscErrorCode ierr; 1430 PetscBool iascii,isdraw,issocket,isbinary; 1431 1432 PetscFunctionBegin; 1433 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1434 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1435 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1436 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1437 if (iascii || isdraw || isbinary || issocket) { 1438 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1439 } 1440 PetscFunctionReturn(0); 1441 } 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatSOR_MPIAIJ" 1445 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1446 { 1447 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1448 PetscErrorCode ierr; 1449 Vec bb1 = 0; 1450 PetscBool hasop; 1451 1452 PetscFunctionBegin; 1453 if (flag == SOR_APPLY_UPPER) { 1454 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1459 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1460 } 1461 1462 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1463 if (flag & SOR_ZERO_INITIAL_GUESS) { 1464 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1465 its--; 1466 } 1467 1468 while (its--) { 1469 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1470 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 1472 /* update rhs: bb1 = bb - B*x */ 1473 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1474 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1475 1476 /* local sweep */ 1477 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1478 } 1479 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1480 if (flag & SOR_ZERO_INITIAL_GUESS) { 1481 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1482 its--; 1483 } 1484 while (its--) { 1485 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 1488 /* update rhs: bb1 = bb - B*x */ 1489 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1490 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1491 1492 /* local sweep */ 1493 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1494 } 1495 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1496 if (flag & SOR_ZERO_INITIAL_GUESS) { 1497 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1498 its--; 1499 } 1500 while (its--) { 1501 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1502 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 1504 /* update rhs: bb1 = bb - B*x */ 1505 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1506 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1507 1508 /* local sweep */ 1509 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1510 } 1511 } else if (flag & SOR_EISENSTAT) { 1512 Vec xx1; 1513 1514 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1515 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1516 1517 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 if (!mat->diag) { 1520 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1521 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1522 } 1523 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1524 if (hasop) { 1525 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1526 } else { 1527 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1528 } 1529 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1530 1531 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1532 1533 /* local sweep */ 1534 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1535 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1536 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1537 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1538 1539 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatPermute_MPIAIJ" 1545 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1546 { 1547 Mat aA,aB,Aperm; 1548 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1549 PetscScalar *aa,*ba; 1550 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1551 PetscSF rowsf,sf; 1552 IS parcolp = NULL; 1553 PetscBool done; 1554 PetscErrorCode ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1558 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1559 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1560 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1561 1562 /* Invert row permutation to find out where my rows should go */ 1563 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1564 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1565 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1566 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1567 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1568 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1569 1570 /* Invert column permutation to find out where my columns should go */ 1571 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1572 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1573 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1574 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1575 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1576 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1577 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1578 1579 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1580 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1581 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1582 1583 /* Find out where my gcols should go */ 1584 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1585 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1586 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1587 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1588 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1589 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1590 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1591 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1592 1593 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1594 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1595 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1596 for (i=0; i<m; i++) { 1597 PetscInt row = rdest[i],rowner; 1598 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1599 for (j=ai[i]; j<ai[i+1]; j++) { 1600 PetscInt cowner,col = cdest[aj[j]]; 1601 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1602 if (rowner == cowner) dnnz[i]++; 1603 else onnz[i]++; 1604 } 1605 for (j=bi[i]; j<bi[i+1]; j++) { 1606 PetscInt cowner,col = gcdest[bj[j]]; 1607 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1608 if (rowner == cowner) dnnz[i]++; 1609 else onnz[i]++; 1610 } 1611 } 1612 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1613 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1614 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1615 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1616 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1617 1618 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1619 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1620 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1621 for (i=0; i<m; i++) { 1622 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1623 PetscInt j0,rowlen; 1624 rowlen = ai[i+1] - ai[i]; 1625 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1626 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1627 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1628 } 1629 rowlen = bi[i+1] - bi[i]; 1630 for (j0=j=0; j<rowlen; j0=j) { 1631 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1632 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1633 } 1634 } 1635 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1636 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1637 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1638 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1639 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1640 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1641 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1642 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1643 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1644 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1645 *B = Aperm; 1646 PetscFunctionReturn(0); 1647 } 1648 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1651 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1652 { 1653 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1654 Mat A = mat->A,B = mat->B; 1655 PetscErrorCode ierr; 1656 PetscReal isend[5],irecv[5]; 1657 1658 PetscFunctionBegin; 1659 info->block_size = 1.0; 1660 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1661 1662 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1663 isend[3] = info->memory; isend[4] = info->mallocs; 1664 1665 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1666 1667 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1668 isend[3] += info->memory; isend[4] += info->mallocs; 1669 if (flag == MAT_LOCAL) { 1670 info->nz_used = isend[0]; 1671 info->nz_allocated = isend[1]; 1672 info->nz_unneeded = isend[2]; 1673 info->memory = isend[3]; 1674 info->mallocs = isend[4]; 1675 } else if (flag == MAT_GLOBAL_MAX) { 1676 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1677 1678 info->nz_used = irecv[0]; 1679 info->nz_allocated = irecv[1]; 1680 info->nz_unneeded = irecv[2]; 1681 info->memory = irecv[3]; 1682 info->mallocs = irecv[4]; 1683 } else if (flag == MAT_GLOBAL_SUM) { 1684 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1685 1686 info->nz_used = irecv[0]; 1687 info->nz_allocated = irecv[1]; 1688 info->nz_unneeded = irecv[2]; 1689 info->memory = irecv[3]; 1690 info->mallocs = irecv[4]; 1691 } 1692 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1693 info->fill_ratio_needed = 0; 1694 info->factor_mallocs = 0; 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatSetOption_MPIAIJ" 1700 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1701 { 1702 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1703 PetscErrorCode ierr; 1704 1705 PetscFunctionBegin; 1706 switch (op) { 1707 case MAT_NEW_NONZERO_LOCATIONS: 1708 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1709 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1710 case MAT_KEEP_NONZERO_PATTERN: 1711 case MAT_NEW_NONZERO_LOCATION_ERR: 1712 case MAT_USE_INODES: 1713 case MAT_IGNORE_ZERO_ENTRIES: 1714 MatCheckPreallocated(A,1); 1715 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1716 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1717 break; 1718 case MAT_ROW_ORIENTED: 1719 a->roworiented = flg; 1720 1721 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1722 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1723 break; 1724 case MAT_NEW_DIAGONALS: 1725 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1726 break; 1727 case MAT_IGNORE_OFF_PROC_ENTRIES: 1728 a->donotstash = flg; 1729 break; 1730 case MAT_SPD: 1731 A->spd_set = PETSC_TRUE; 1732 A->spd = flg; 1733 if (flg) { 1734 A->symmetric = PETSC_TRUE; 1735 A->structurally_symmetric = PETSC_TRUE; 1736 A->symmetric_set = PETSC_TRUE; 1737 A->structurally_symmetric_set = PETSC_TRUE; 1738 } 1739 break; 1740 case MAT_SYMMETRIC: 1741 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1742 break; 1743 case MAT_STRUCTURALLY_SYMMETRIC: 1744 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1745 break; 1746 case MAT_HERMITIAN: 1747 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1748 break; 1749 case MAT_SYMMETRY_ETERNAL: 1750 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1751 break; 1752 default: 1753 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1754 } 1755 PetscFunctionReturn(0); 1756 } 1757 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatGetRow_MPIAIJ" 1760 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1761 { 1762 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1763 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1764 PetscErrorCode ierr; 1765 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1766 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1767 PetscInt *cmap,*idx_p; 1768 1769 PetscFunctionBegin; 1770 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1771 mat->getrowactive = PETSC_TRUE; 1772 1773 if (!mat->rowvalues && (idx || v)) { 1774 /* 1775 allocate enough space to hold information from the longest row. 1776 */ 1777 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1778 PetscInt max = 1,tmp; 1779 for (i=0; i<matin->rmap->n; i++) { 1780 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1781 if (max < tmp) max = tmp; 1782 } 1783 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1784 } 1785 1786 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1787 lrow = row - rstart; 1788 1789 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1790 if (!v) {pvA = 0; pvB = 0;} 1791 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1792 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1793 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1794 nztot = nzA + nzB; 1795 1796 cmap = mat->garray; 1797 if (v || idx) { 1798 if (nztot) { 1799 /* Sort by increasing column numbers, assuming A and B already sorted */ 1800 PetscInt imark = -1; 1801 if (v) { 1802 *v = v_p = mat->rowvalues; 1803 for (i=0; i<nzB; i++) { 1804 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1805 else break; 1806 } 1807 imark = i; 1808 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1809 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1810 } 1811 if (idx) { 1812 *idx = idx_p = mat->rowindices; 1813 if (imark > -1) { 1814 for (i=0; i<imark; i++) { 1815 idx_p[i] = cmap[cworkB[i]]; 1816 } 1817 } else { 1818 for (i=0; i<nzB; i++) { 1819 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1820 else break; 1821 } 1822 imark = i; 1823 } 1824 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1825 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1826 } 1827 } else { 1828 if (idx) *idx = 0; 1829 if (v) *v = 0; 1830 } 1831 } 1832 *nz = nztot; 1833 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1834 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1840 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1841 { 1842 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1843 1844 PetscFunctionBegin; 1845 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1846 aij->getrowactive = PETSC_FALSE; 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatNorm_MPIAIJ" 1852 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1853 { 1854 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1855 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1856 PetscErrorCode ierr; 1857 PetscInt i,j,cstart = mat->cmap->rstart; 1858 PetscReal sum = 0.0; 1859 MatScalar *v; 1860 1861 PetscFunctionBegin; 1862 if (aij->size == 1) { 1863 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1864 } else { 1865 if (type == NORM_FROBENIUS) { 1866 v = amat->a; 1867 for (i=0; i<amat->nz; i++) { 1868 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1869 } 1870 v = bmat->a; 1871 for (i=0; i<bmat->nz; i++) { 1872 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1873 } 1874 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1875 *norm = PetscSqrtReal(*norm); 1876 } else if (type == NORM_1) { /* max column norm */ 1877 PetscReal *tmp,*tmp2; 1878 PetscInt *jj,*garray = aij->garray; 1879 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1880 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1881 *norm = 0.0; 1882 v = amat->a; jj = amat->j; 1883 for (j=0; j<amat->nz; j++) { 1884 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1885 } 1886 v = bmat->a; jj = bmat->j; 1887 for (j=0; j<bmat->nz; j++) { 1888 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1889 } 1890 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1891 for (j=0; j<mat->cmap->N; j++) { 1892 if (tmp2[j] > *norm) *norm = tmp2[j]; 1893 } 1894 ierr = PetscFree(tmp);CHKERRQ(ierr); 1895 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1896 } else if (type == NORM_INFINITY) { /* max row norm */ 1897 PetscReal ntemp = 0.0; 1898 for (j=0; j<aij->A->rmap->n; j++) { 1899 v = amat->a + amat->i[j]; 1900 sum = 0.0; 1901 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1902 sum += PetscAbsScalar(*v); v++; 1903 } 1904 v = bmat->a + bmat->i[j]; 1905 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1906 sum += PetscAbsScalar(*v); v++; 1907 } 1908 if (sum > ntemp) ntemp = sum; 1909 } 1910 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1911 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1912 } 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatTranspose_MPIAIJ" 1918 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1919 { 1920 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1921 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1922 PetscErrorCode ierr; 1923 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1924 PetscInt cstart = A->cmap->rstart,ncol; 1925 Mat B; 1926 MatScalar *array; 1927 1928 PetscFunctionBegin; 1929 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1930 1931 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1932 ai = Aloc->i; aj = Aloc->j; 1933 bi = Bloc->i; bj = Bloc->j; 1934 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1935 PetscInt *d_nnz,*g_nnz,*o_nnz; 1936 PetscSFNode *oloc; 1937 PETSC_UNUSED PetscSF sf; 1938 1939 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1940 /* compute d_nnz for preallocation */ 1941 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1942 for (i=0; i<ai[ma]; i++) { 1943 d_nnz[aj[i]]++; 1944 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1945 } 1946 /* compute local off-diagonal contributions */ 1947 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1948 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1949 /* map those to global */ 1950 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1951 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1952 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1953 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1954 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1955 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1956 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1957 1958 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1959 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1960 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1961 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1962 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1963 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1964 } else { 1965 B = *matout; 1966 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1967 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1968 } 1969 1970 /* copy over the A part */ 1971 array = Aloc->a; 1972 row = A->rmap->rstart; 1973 for (i=0; i<ma; i++) { 1974 ncol = ai[i+1]-ai[i]; 1975 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1976 row++; 1977 array += ncol; aj += ncol; 1978 } 1979 aj = Aloc->j; 1980 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1981 1982 /* copy over the B part */ 1983 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1984 array = Bloc->a; 1985 row = A->rmap->rstart; 1986 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1987 cols_tmp = cols; 1988 for (i=0; i<mb; i++) { 1989 ncol = bi[i+1]-bi[i]; 1990 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1991 row++; 1992 array += ncol; cols_tmp += ncol; 1993 } 1994 ierr = PetscFree(cols);CHKERRQ(ierr); 1995 1996 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1997 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1998 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1999 *matout = B; 2000 } else { 2001 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2002 } 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2008 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2009 { 2010 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2011 Mat a = aij->A,b = aij->B; 2012 PetscErrorCode ierr; 2013 PetscInt s1,s2,s3; 2014 2015 PetscFunctionBegin; 2016 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2017 if (rr) { 2018 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2019 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2020 /* Overlap communication with computation. */ 2021 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2022 } 2023 if (ll) { 2024 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2025 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2026 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2027 } 2028 /* scale the diagonal block */ 2029 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2030 2031 if (rr) { 2032 /* Do a scatter end and then right scale the off-diagonal block */ 2033 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2034 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2035 } 2036 PetscFunctionReturn(0); 2037 } 2038 2039 #undef __FUNCT__ 2040 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2041 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2042 { 2043 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2044 PetscErrorCode ierr; 2045 2046 PetscFunctionBegin; 2047 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2048 PetscFunctionReturn(0); 2049 } 2050 2051 #undef __FUNCT__ 2052 #define __FUNCT__ "MatEqual_MPIAIJ" 2053 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2054 { 2055 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2056 Mat a,b,c,d; 2057 PetscBool flg; 2058 PetscErrorCode ierr; 2059 2060 PetscFunctionBegin; 2061 a = matA->A; b = matA->B; 2062 c = matB->A; d = matB->B; 2063 2064 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2065 if (flg) { 2066 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2067 } 2068 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatCopy_MPIAIJ" 2074 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2075 { 2076 PetscErrorCode ierr; 2077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2078 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2079 2080 PetscFunctionBegin; 2081 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2082 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2083 /* because of the column compression in the off-processor part of the matrix a->B, 2084 the number of columns in a->B and b->B may be different, hence we cannot call 2085 the MatCopy() directly on the two parts. If need be, we can provide a more 2086 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2087 then copying the submatrices */ 2088 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2089 } else { 2090 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2091 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2092 } 2093 PetscFunctionReturn(0); 2094 } 2095 2096 #undef __FUNCT__ 2097 #define __FUNCT__ "MatSetUp_MPIAIJ" 2098 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2099 { 2100 PetscErrorCode ierr; 2101 2102 PetscFunctionBegin; 2103 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2104 PetscFunctionReturn(0); 2105 } 2106 2107 /* 2108 Computes the number of nonzeros per row needed for preallocation when X and Y 2109 have different nonzero structure. 2110 */ 2111 #undef __FUNCT__ 2112 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2113 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2114 { 2115 PetscInt i,j,k,nzx,nzy; 2116 2117 PetscFunctionBegin; 2118 /* Set the number of nonzeros in the new matrix */ 2119 for (i=0; i<m; i++) { 2120 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2121 nzx = xi[i+1] - xi[i]; 2122 nzy = yi[i+1] - yi[i]; 2123 nnz[i] = 0; 2124 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2125 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2126 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2127 nnz[i]++; 2128 } 2129 for (; k<nzy; k++) nnz[i]++; 2130 } 2131 PetscFunctionReturn(0); 2132 } 2133 2134 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2137 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2138 { 2139 PetscErrorCode ierr; 2140 PetscInt m = Y->rmap->N; 2141 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2142 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2143 2144 PetscFunctionBegin; 2145 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2146 PetscFunctionReturn(0); 2147 } 2148 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "MatAXPY_MPIAIJ" 2151 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2152 { 2153 PetscErrorCode ierr; 2154 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2155 PetscBLASInt bnz,one=1; 2156 Mat_SeqAIJ *x,*y; 2157 2158 PetscFunctionBegin; 2159 if (str == SAME_NONZERO_PATTERN) { 2160 PetscScalar alpha = a; 2161 x = (Mat_SeqAIJ*)xx->A->data; 2162 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2163 y = (Mat_SeqAIJ*)yy->A->data; 2164 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2165 x = (Mat_SeqAIJ*)xx->B->data; 2166 y = (Mat_SeqAIJ*)yy->B->data; 2167 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2168 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2169 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2170 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2171 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2172 } else { 2173 Mat B; 2174 PetscInt *nnz_d,*nnz_o; 2175 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2176 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2177 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2178 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2179 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2180 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2181 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2182 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2183 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2184 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2185 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2186 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2187 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2188 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2189 } 2190 PetscFunctionReturn(0); 2191 } 2192 2193 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatConjugate_MPIAIJ" 2197 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2198 { 2199 #if defined(PETSC_USE_COMPLEX) 2200 PetscErrorCode ierr; 2201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2202 2203 PetscFunctionBegin; 2204 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2205 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2206 #else 2207 PetscFunctionBegin; 2208 #endif 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatRealPart_MPIAIJ" 2214 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2215 { 2216 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2217 PetscErrorCode ierr; 2218 2219 PetscFunctionBegin; 2220 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2221 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 #undef __FUNCT__ 2226 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2227 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2228 { 2229 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2230 PetscErrorCode ierr; 2231 2232 PetscFunctionBegin; 2233 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2234 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2235 PetscFunctionReturn(0); 2236 } 2237 2238 #if defined(PETSC_HAVE_PBGL) 2239 2240 #include <boost/parallel/mpi/bsp_process_group.hpp> 2241 #include <boost/graph/distributed/ilu_default_graph.hpp> 2242 #include <boost/graph/distributed/ilu_0_block.hpp> 2243 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2244 #include <boost/graph/distributed/petsc/interface.hpp> 2245 #include <boost/multi_array.hpp> 2246 #include <boost/parallel/distributed_property_map->hpp> 2247 2248 #undef __FUNCT__ 2249 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2250 /* 2251 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2252 */ 2253 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2254 { 2255 namespace petsc = boost::distributed::petsc; 2256 2257 namespace graph_dist = boost::graph::distributed; 2258 using boost::graph::distributed::ilu_default::process_group_type; 2259 using boost::graph::ilu_permuted; 2260 2261 PetscBool row_identity, col_identity; 2262 PetscContainer c; 2263 PetscInt m, n, M, N; 2264 PetscErrorCode ierr; 2265 2266 PetscFunctionBegin; 2267 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2268 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2269 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2270 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2271 2272 process_group_type pg; 2273 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2274 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2275 lgraph_type& level_graph = *lgraph_p; 2276 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2277 2278 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2279 ilu_permuted(level_graph); 2280 2281 /* put together the new matrix */ 2282 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2283 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2284 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2285 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2286 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2287 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2288 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2289 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2290 2291 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2292 ierr = PetscContainerSetPointer(c, lgraph_p); 2293 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2294 ierr = PetscContainerDestroy(&c); 2295 PetscFunctionReturn(0); 2296 } 2297 2298 #undef __FUNCT__ 2299 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2300 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2301 { 2302 PetscFunctionBegin; 2303 PetscFunctionReturn(0); 2304 } 2305 2306 #undef __FUNCT__ 2307 #define __FUNCT__ "MatSolve_MPIAIJ" 2308 /* 2309 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2310 */ 2311 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2312 { 2313 namespace graph_dist = boost::graph::distributed; 2314 2315 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2316 lgraph_type *lgraph_p; 2317 PetscContainer c; 2318 PetscErrorCode ierr; 2319 2320 PetscFunctionBegin; 2321 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2322 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2323 ierr = VecCopy(b, x);CHKERRQ(ierr); 2324 2325 PetscScalar *array_x; 2326 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2327 PetscInt sx; 2328 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2329 2330 PetscScalar *array_b; 2331 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2332 PetscInt sb; 2333 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2334 2335 lgraph_type& level_graph = *lgraph_p; 2336 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2337 2338 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2339 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2340 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2341 2342 typedef boost::iterator_property_map<array_ref_type::iterator, 2343 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2344 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2345 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2346 2347 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2348 PetscFunctionReturn(0); 2349 } 2350 #endif 2351 2352 #undef __FUNCT__ 2353 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2354 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2355 { 2356 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2357 PetscErrorCode ierr; 2358 PetscInt i,*idxb = 0; 2359 PetscScalar *va,*vb; 2360 Vec vtmp; 2361 2362 PetscFunctionBegin; 2363 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2364 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2365 if (idx) { 2366 for (i=0; i<A->rmap->n; i++) { 2367 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2368 } 2369 } 2370 2371 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2372 if (idx) { 2373 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2374 } 2375 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2376 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2377 2378 for (i=0; i<A->rmap->n; i++) { 2379 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2380 va[i] = vb[i]; 2381 if (idx) idx[i] = a->garray[idxb[i]]; 2382 } 2383 } 2384 2385 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2386 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2387 ierr = PetscFree(idxb);CHKERRQ(ierr); 2388 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2389 PetscFunctionReturn(0); 2390 } 2391 2392 #undef __FUNCT__ 2393 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2394 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2395 { 2396 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2397 PetscErrorCode ierr; 2398 PetscInt i,*idxb = 0; 2399 PetscScalar *va,*vb; 2400 Vec vtmp; 2401 2402 PetscFunctionBegin; 2403 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2404 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2405 if (idx) { 2406 for (i=0; i<A->cmap->n; i++) { 2407 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2408 } 2409 } 2410 2411 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2412 if (idx) { 2413 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2414 } 2415 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2416 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2417 2418 for (i=0; i<A->rmap->n; i++) { 2419 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2420 va[i] = vb[i]; 2421 if (idx) idx[i] = a->garray[idxb[i]]; 2422 } 2423 } 2424 2425 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2426 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2427 ierr = PetscFree(idxb);CHKERRQ(ierr); 2428 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2429 PetscFunctionReturn(0); 2430 } 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2434 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2435 { 2436 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2437 PetscInt n = A->rmap->n; 2438 PetscInt cstart = A->cmap->rstart; 2439 PetscInt *cmap = mat->garray; 2440 PetscInt *diagIdx, *offdiagIdx; 2441 Vec diagV, offdiagV; 2442 PetscScalar *a, *diagA, *offdiagA; 2443 PetscInt r; 2444 PetscErrorCode ierr; 2445 2446 PetscFunctionBegin; 2447 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2448 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2449 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2450 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2451 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2452 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2453 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2454 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2455 for (r = 0; r < n; ++r) { 2456 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2457 a[r] = diagA[r]; 2458 idx[r] = cstart + diagIdx[r]; 2459 } else { 2460 a[r] = offdiagA[r]; 2461 idx[r] = cmap[offdiagIdx[r]]; 2462 } 2463 } 2464 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2465 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2466 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2467 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2468 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2469 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2475 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2476 { 2477 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2478 PetscInt n = A->rmap->n; 2479 PetscInt cstart = A->cmap->rstart; 2480 PetscInt *cmap = mat->garray; 2481 PetscInt *diagIdx, *offdiagIdx; 2482 Vec diagV, offdiagV; 2483 PetscScalar *a, *diagA, *offdiagA; 2484 PetscInt r; 2485 PetscErrorCode ierr; 2486 2487 PetscFunctionBegin; 2488 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2489 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2490 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2491 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2492 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2493 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2494 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2495 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2496 for (r = 0; r < n; ++r) { 2497 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2498 a[r] = diagA[r]; 2499 idx[r] = cstart + diagIdx[r]; 2500 } else { 2501 a[r] = offdiagA[r]; 2502 idx[r] = cmap[offdiagIdx[r]]; 2503 } 2504 } 2505 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2506 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2507 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2509 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2510 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2511 PetscFunctionReturn(0); 2512 } 2513 2514 #undef __FUNCT__ 2515 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2516 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2517 { 2518 PetscErrorCode ierr; 2519 Mat *dummy; 2520 2521 PetscFunctionBegin; 2522 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2523 *newmat = *dummy; 2524 ierr = PetscFree(dummy);CHKERRQ(ierr); 2525 PetscFunctionReturn(0); 2526 } 2527 2528 #undef __FUNCT__ 2529 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2530 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2531 { 2532 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2533 PetscErrorCode ierr; 2534 2535 PetscFunctionBegin; 2536 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2542 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2543 { 2544 PetscErrorCode ierr; 2545 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2546 2547 PetscFunctionBegin; 2548 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2549 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2550 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2551 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2552 PetscFunctionReturn(0); 2553 } 2554 2555 /* -------------------------------------------------------------------*/ 2556 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2557 MatGetRow_MPIAIJ, 2558 MatRestoreRow_MPIAIJ, 2559 MatMult_MPIAIJ, 2560 /* 4*/ MatMultAdd_MPIAIJ, 2561 MatMultTranspose_MPIAIJ, 2562 MatMultTransposeAdd_MPIAIJ, 2563 #if defined(PETSC_HAVE_PBGL) 2564 MatSolve_MPIAIJ, 2565 #else 2566 0, 2567 #endif 2568 0, 2569 0, 2570 /*10*/ 0, 2571 0, 2572 0, 2573 MatSOR_MPIAIJ, 2574 MatTranspose_MPIAIJ, 2575 /*15*/ MatGetInfo_MPIAIJ, 2576 MatEqual_MPIAIJ, 2577 MatGetDiagonal_MPIAIJ, 2578 MatDiagonalScale_MPIAIJ, 2579 MatNorm_MPIAIJ, 2580 /*20*/ MatAssemblyBegin_MPIAIJ, 2581 MatAssemblyEnd_MPIAIJ, 2582 MatSetOption_MPIAIJ, 2583 MatZeroEntries_MPIAIJ, 2584 /*24*/ MatZeroRows_MPIAIJ, 2585 0, 2586 #if defined(PETSC_HAVE_PBGL) 2587 0, 2588 #else 2589 0, 2590 #endif 2591 0, 2592 0, 2593 /*29*/ MatSetUp_MPIAIJ, 2594 #if defined(PETSC_HAVE_PBGL) 2595 0, 2596 #else 2597 0, 2598 #endif 2599 0, 2600 0, 2601 0, 2602 /*34*/ MatDuplicate_MPIAIJ, 2603 0, 2604 0, 2605 0, 2606 0, 2607 /*39*/ MatAXPY_MPIAIJ, 2608 MatGetSubMatrices_MPIAIJ, 2609 MatIncreaseOverlap_MPIAIJ, 2610 MatGetValues_MPIAIJ, 2611 MatCopy_MPIAIJ, 2612 /*44*/ MatGetRowMax_MPIAIJ, 2613 MatScale_MPIAIJ, 2614 0, 2615 MatDiagonalSet_MPIAIJ, 2616 MatZeroRowsColumns_MPIAIJ, 2617 /*49*/ MatSetRandom_MPIAIJ, 2618 0, 2619 0, 2620 0, 2621 0, 2622 /*54*/ MatFDColoringCreate_MPIXAIJ, 2623 0, 2624 MatSetUnfactored_MPIAIJ, 2625 MatPermute_MPIAIJ, 2626 0, 2627 /*59*/ MatGetSubMatrix_MPIAIJ, 2628 MatDestroy_MPIAIJ, 2629 MatView_MPIAIJ, 2630 0, 2631 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2632 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2633 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2634 0, 2635 0, 2636 0, 2637 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2638 MatGetRowMinAbs_MPIAIJ, 2639 0, 2640 MatSetColoring_MPIAIJ, 2641 0, 2642 MatSetValuesAdifor_MPIAIJ, 2643 /*75*/ MatFDColoringApply_AIJ, 2644 0, 2645 0, 2646 0, 2647 MatFindZeroDiagonals_MPIAIJ, 2648 /*80*/ 0, 2649 0, 2650 0, 2651 /*83*/ MatLoad_MPIAIJ, 2652 0, 2653 0, 2654 0, 2655 0, 2656 0, 2657 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2658 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2659 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2660 MatPtAP_MPIAIJ_MPIAIJ, 2661 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2662 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2663 0, 2664 0, 2665 0, 2666 0, 2667 /*99*/ 0, 2668 0, 2669 0, 2670 MatConjugate_MPIAIJ, 2671 0, 2672 /*104*/MatSetValuesRow_MPIAIJ, 2673 MatRealPart_MPIAIJ, 2674 MatImaginaryPart_MPIAIJ, 2675 0, 2676 0, 2677 /*109*/0, 2678 0, 2679 MatGetRowMin_MPIAIJ, 2680 0, 2681 0, 2682 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /*119*/0, 2688 0, 2689 0, 2690 0, 2691 MatGetMultiProcBlock_MPIAIJ, 2692 /*124*/MatFindNonzeroRows_MPIAIJ, 2693 MatGetColumnNorms_MPIAIJ, 2694 MatInvertBlockDiagonal_MPIAIJ, 2695 0, 2696 MatGetSubMatricesParallel_MPIAIJ, 2697 /*129*/0, 2698 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2699 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2700 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2701 0, 2702 /*134*/0, 2703 0, 2704 0, 2705 0, 2706 0, 2707 /*139*/0, 2708 0, 2709 0, 2710 MatFDColoringSetUp_MPIXAIJ, 2711 0, 2712 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2713 }; 2714 2715 /* ----------------------------------------------------------------------------------------*/ 2716 2717 #undef __FUNCT__ 2718 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2719 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2720 { 2721 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2722 PetscErrorCode ierr; 2723 2724 PetscFunctionBegin; 2725 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2726 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2727 PetscFunctionReturn(0); 2728 } 2729 2730 #undef __FUNCT__ 2731 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2732 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2733 { 2734 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2735 PetscErrorCode ierr; 2736 2737 PetscFunctionBegin; 2738 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2739 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2740 PetscFunctionReturn(0); 2741 } 2742 2743 #undef __FUNCT__ 2744 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2745 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2746 { 2747 Mat_MPIAIJ *b; 2748 PetscErrorCode ierr; 2749 2750 PetscFunctionBegin; 2751 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2752 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2753 b = (Mat_MPIAIJ*)B->data; 2754 2755 if (!B->preallocated) { 2756 /* Explicitly create 2 MATSEQAIJ matrices. */ 2757 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2758 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2759 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2760 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2761 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2762 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2763 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2764 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2765 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2766 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2767 } 2768 2769 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2770 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2771 B->preallocated = PETSC_TRUE; 2772 PetscFunctionReturn(0); 2773 } 2774 2775 #undef __FUNCT__ 2776 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2777 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2778 { 2779 Mat mat; 2780 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2781 PetscErrorCode ierr; 2782 2783 PetscFunctionBegin; 2784 *newmat = 0; 2785 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2786 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2787 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2788 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2789 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2790 a = (Mat_MPIAIJ*)mat->data; 2791 2792 mat->factortype = matin->factortype; 2793 mat->assembled = PETSC_TRUE; 2794 mat->insertmode = NOT_SET_VALUES; 2795 mat->preallocated = PETSC_TRUE; 2796 2797 a->size = oldmat->size; 2798 a->rank = oldmat->rank; 2799 a->donotstash = oldmat->donotstash; 2800 a->roworiented = oldmat->roworiented; 2801 a->rowindices = 0; 2802 a->rowvalues = 0; 2803 a->getrowactive = PETSC_FALSE; 2804 2805 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2806 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2807 2808 if (oldmat->colmap) { 2809 #if defined(PETSC_USE_CTABLE) 2810 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2811 #else 2812 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2813 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2814 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2815 #endif 2816 } else a->colmap = 0; 2817 if (oldmat->garray) { 2818 PetscInt len; 2819 len = oldmat->B->cmap->n; 2820 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2821 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2822 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2823 } else a->garray = 0; 2824 2825 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2826 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2827 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2828 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2829 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2830 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2831 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2832 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2833 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2834 *newmat = mat; 2835 PetscFunctionReturn(0); 2836 } 2837 2838 2839 2840 #undef __FUNCT__ 2841 #define __FUNCT__ "MatLoad_MPIAIJ" 2842 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2843 { 2844 PetscScalar *vals,*svals; 2845 MPI_Comm comm; 2846 PetscErrorCode ierr; 2847 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2848 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2849 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2850 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2851 PetscInt cend,cstart,n,*rowners,sizesset=1; 2852 int fd; 2853 PetscInt bs = newMat->rmap->bs; 2854 2855 PetscFunctionBegin; 2856 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2857 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2858 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2859 if (!rank) { 2860 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2861 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2862 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2863 } 2864 2865 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 2866 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2867 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2868 if (bs < 0) bs = 1; 2869 2870 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2871 2872 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2873 M = header[1]; N = header[2]; 2874 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2875 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2876 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2877 2878 /* If global sizes are set, check if they are consistent with that given in the file */ 2879 if (sizesset) { 2880 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2881 } 2882 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2883 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2884 2885 /* determine ownership of all (block) rows */ 2886 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2887 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2888 else m = newMat->rmap->n; /* Set by user */ 2889 2890 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2891 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2892 2893 /* First process needs enough room for process with most rows */ 2894 if (!rank) { 2895 mmax = rowners[1]; 2896 for (i=2; i<=size; i++) { 2897 mmax = PetscMax(mmax, rowners[i]); 2898 } 2899 } else mmax = -1; /* unused, but compilers complain */ 2900 2901 rowners[0] = 0; 2902 for (i=2; i<=size; i++) { 2903 rowners[i] += rowners[i-1]; 2904 } 2905 rstart = rowners[rank]; 2906 rend = rowners[rank+1]; 2907 2908 /* distribute row lengths to all processors */ 2909 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2910 if (!rank) { 2911 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2912 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2913 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2914 for (j=0; j<m; j++) { 2915 procsnz[0] += ourlens[j]; 2916 } 2917 for (i=1; i<size; i++) { 2918 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2919 /* calculate the number of nonzeros on each processor */ 2920 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2921 procsnz[i] += rowlengths[j]; 2922 } 2923 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2924 } 2925 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2926 } else { 2927 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2928 } 2929 2930 if (!rank) { 2931 /* determine max buffer needed and allocate it */ 2932 maxnz = 0; 2933 for (i=0; i<size; i++) { 2934 maxnz = PetscMax(maxnz,procsnz[i]); 2935 } 2936 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2937 2938 /* read in my part of the matrix column indices */ 2939 nz = procsnz[0]; 2940 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2941 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2942 2943 /* read in every one elses and ship off */ 2944 for (i=1; i<size; i++) { 2945 nz = procsnz[i]; 2946 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2947 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2948 } 2949 ierr = PetscFree(cols);CHKERRQ(ierr); 2950 } else { 2951 /* determine buffer space needed for message */ 2952 nz = 0; 2953 for (i=0; i<m; i++) { 2954 nz += ourlens[i]; 2955 } 2956 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2957 2958 /* receive message of column indices*/ 2959 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2960 } 2961 2962 /* determine column ownership if matrix is not square */ 2963 if (N != M) { 2964 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2965 else n = newMat->cmap->n; 2966 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2967 cstart = cend - n; 2968 } else { 2969 cstart = rstart; 2970 cend = rend; 2971 n = cend - cstart; 2972 } 2973 2974 /* loop over local rows, determining number of off diagonal entries */ 2975 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2976 jj = 0; 2977 for (i=0; i<m; i++) { 2978 for (j=0; j<ourlens[i]; j++) { 2979 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2980 jj++; 2981 } 2982 } 2983 2984 for (i=0; i<m; i++) { 2985 ourlens[i] -= offlens[i]; 2986 } 2987 if (!sizesset) { 2988 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2989 } 2990 2991 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2992 2993 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2994 2995 for (i=0; i<m; i++) { 2996 ourlens[i] += offlens[i]; 2997 } 2998 2999 if (!rank) { 3000 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3001 3002 /* read in my part of the matrix numerical values */ 3003 nz = procsnz[0]; 3004 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3005 3006 /* insert into matrix */ 3007 jj = rstart; 3008 smycols = mycols; 3009 svals = vals; 3010 for (i=0; i<m; i++) { 3011 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3012 smycols += ourlens[i]; 3013 svals += ourlens[i]; 3014 jj++; 3015 } 3016 3017 /* read in other processors and ship out */ 3018 for (i=1; i<size; i++) { 3019 nz = procsnz[i]; 3020 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3021 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3022 } 3023 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3024 } else { 3025 /* receive numeric values */ 3026 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3027 3028 /* receive message of values*/ 3029 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3030 3031 /* insert into matrix */ 3032 jj = rstart; 3033 smycols = mycols; 3034 svals = vals; 3035 for (i=0; i<m; i++) { 3036 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3037 smycols += ourlens[i]; 3038 svals += ourlens[i]; 3039 jj++; 3040 } 3041 } 3042 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3043 ierr = PetscFree(vals);CHKERRQ(ierr); 3044 ierr = PetscFree(mycols);CHKERRQ(ierr); 3045 ierr = PetscFree(rowners);CHKERRQ(ierr); 3046 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3047 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3048 PetscFunctionReturn(0); 3049 } 3050 3051 #undef __FUNCT__ 3052 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3053 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3054 { 3055 PetscErrorCode ierr; 3056 IS iscol_local; 3057 PetscInt csize; 3058 3059 PetscFunctionBegin; 3060 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3061 if (call == MAT_REUSE_MATRIX) { 3062 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3063 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3064 } else { 3065 PetscInt cbs; 3066 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3067 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3068 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3069 } 3070 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3071 if (call == MAT_INITIAL_MATRIX) { 3072 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3073 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3074 } 3075 PetscFunctionReturn(0); 3076 } 3077 3078 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3079 #undef __FUNCT__ 3080 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3081 /* 3082 Not great since it makes two copies of the submatrix, first an SeqAIJ 3083 in local and then by concatenating the local matrices the end result. 3084 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3085 3086 Note: This requires a sequential iscol with all indices. 3087 */ 3088 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3089 { 3090 PetscErrorCode ierr; 3091 PetscMPIInt rank,size; 3092 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3093 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3094 PetscBool allcolumns, colflag; 3095 Mat M,Mreuse; 3096 MatScalar *vwork,*aa; 3097 MPI_Comm comm; 3098 Mat_SeqAIJ *aij; 3099 3100 PetscFunctionBegin; 3101 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3102 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3103 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3104 3105 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3106 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3107 if (colflag && ncol == mat->cmap->N) { 3108 allcolumns = PETSC_TRUE; 3109 } else { 3110 allcolumns = PETSC_FALSE; 3111 } 3112 if (call == MAT_REUSE_MATRIX) { 3113 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3114 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3115 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3116 } else { 3117 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3118 } 3119 3120 /* 3121 m - number of local rows 3122 n - number of columns (same on all processors) 3123 rstart - first row in new global matrix generated 3124 */ 3125 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3126 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3127 if (call == MAT_INITIAL_MATRIX) { 3128 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3129 ii = aij->i; 3130 jj = aij->j; 3131 3132 /* 3133 Determine the number of non-zeros in the diagonal and off-diagonal 3134 portions of the matrix in order to do correct preallocation 3135 */ 3136 3137 /* first get start and end of "diagonal" columns */ 3138 if (csize == PETSC_DECIDE) { 3139 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3140 if (mglobal == n) { /* square matrix */ 3141 nlocal = m; 3142 } else { 3143 nlocal = n/size + ((n % size) > rank); 3144 } 3145 } else { 3146 nlocal = csize; 3147 } 3148 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3149 rstart = rend - nlocal; 3150 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3151 3152 /* next, compute all the lengths */ 3153 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3154 olens = dlens + m; 3155 for (i=0; i<m; i++) { 3156 jend = ii[i+1] - ii[i]; 3157 olen = 0; 3158 dlen = 0; 3159 for (j=0; j<jend; j++) { 3160 if (*jj < rstart || *jj >= rend) olen++; 3161 else dlen++; 3162 jj++; 3163 } 3164 olens[i] = olen; 3165 dlens[i] = dlen; 3166 } 3167 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3168 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3169 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3170 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3171 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3172 ierr = PetscFree(dlens);CHKERRQ(ierr); 3173 } else { 3174 PetscInt ml,nl; 3175 3176 M = *newmat; 3177 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3178 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3179 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3180 /* 3181 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3182 rather than the slower MatSetValues(). 3183 */ 3184 M->was_assembled = PETSC_TRUE; 3185 M->assembled = PETSC_FALSE; 3186 } 3187 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3188 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3189 ii = aij->i; 3190 jj = aij->j; 3191 aa = aij->a; 3192 for (i=0; i<m; i++) { 3193 row = rstart + i; 3194 nz = ii[i+1] - ii[i]; 3195 cwork = jj; jj += nz; 3196 vwork = aa; aa += nz; 3197 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3198 } 3199 3200 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3201 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3202 *newmat = M; 3203 3204 /* save submatrix used in processor for next request */ 3205 if (call == MAT_INITIAL_MATRIX) { 3206 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3207 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3208 } 3209 PetscFunctionReturn(0); 3210 } 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3214 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3215 { 3216 PetscInt m,cstart, cend,j,nnz,i,d; 3217 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3218 const PetscInt *JJ; 3219 PetscScalar *values; 3220 PetscErrorCode ierr; 3221 3222 PetscFunctionBegin; 3223 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3224 3225 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3226 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3227 m = B->rmap->n; 3228 cstart = B->cmap->rstart; 3229 cend = B->cmap->rend; 3230 rstart = B->rmap->rstart; 3231 3232 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3233 3234 #if defined(PETSC_USE_DEBUGGING) 3235 for (i=0; i<m; i++) { 3236 nnz = Ii[i+1]- Ii[i]; 3237 JJ = J + Ii[i]; 3238 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3239 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3240 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3241 } 3242 #endif 3243 3244 for (i=0; i<m; i++) { 3245 nnz = Ii[i+1]- Ii[i]; 3246 JJ = J + Ii[i]; 3247 nnz_max = PetscMax(nnz_max,nnz); 3248 d = 0; 3249 for (j=0; j<nnz; j++) { 3250 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3251 } 3252 d_nnz[i] = d; 3253 o_nnz[i] = nnz - d; 3254 } 3255 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3256 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3257 3258 if (v) values = (PetscScalar*)v; 3259 else { 3260 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3261 } 3262 3263 for (i=0; i<m; i++) { 3264 ii = i + rstart; 3265 nnz = Ii[i+1]- Ii[i]; 3266 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3267 } 3268 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3269 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3270 3271 if (!v) { 3272 ierr = PetscFree(values);CHKERRQ(ierr); 3273 } 3274 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3275 PetscFunctionReturn(0); 3276 } 3277 3278 #undef __FUNCT__ 3279 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3280 /*@ 3281 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3282 (the default parallel PETSc format). 3283 3284 Collective on MPI_Comm 3285 3286 Input Parameters: 3287 + B - the matrix 3288 . i - the indices into j for the start of each local row (starts with zero) 3289 . j - the column indices for each local row (starts with zero) 3290 - v - optional values in the matrix 3291 3292 Level: developer 3293 3294 Notes: 3295 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3296 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3297 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3298 3299 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3300 3301 The format which is used for the sparse matrix input, is equivalent to a 3302 row-major ordering.. i.e for the following matrix, the input data expected is 3303 as shown: 3304 3305 1 0 0 3306 2 0 3 P0 3307 ------- 3308 4 5 6 P1 3309 3310 Process0 [P0]: rows_owned=[0,1] 3311 i = {0,1,3} [size = nrow+1 = 2+1] 3312 j = {0,0,2} [size = nz = 6] 3313 v = {1,2,3} [size = nz = 6] 3314 3315 Process1 [P1]: rows_owned=[2] 3316 i = {0,3} [size = nrow+1 = 1+1] 3317 j = {0,1,2} [size = nz = 6] 3318 v = {4,5,6} [size = nz = 6] 3319 3320 .keywords: matrix, aij, compressed row, sparse, parallel 3321 3322 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3323 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3324 @*/ 3325 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3326 { 3327 PetscErrorCode ierr; 3328 3329 PetscFunctionBegin; 3330 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3331 PetscFunctionReturn(0); 3332 } 3333 3334 #undef __FUNCT__ 3335 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3336 /*@C 3337 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3338 (the default parallel PETSc format). For good matrix assembly performance 3339 the user should preallocate the matrix storage by setting the parameters 3340 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3341 performance can be increased by more than a factor of 50. 3342 3343 Collective on MPI_Comm 3344 3345 Input Parameters: 3346 + B - the matrix 3347 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3348 (same value is used for all local rows) 3349 . d_nnz - array containing the number of nonzeros in the various rows of the 3350 DIAGONAL portion of the local submatrix (possibly different for each row) 3351 or NULL, if d_nz is used to specify the nonzero structure. 3352 The size of this array is equal to the number of local rows, i.e 'm'. 3353 For matrices that will be factored, you must leave room for (and set) 3354 the diagonal entry even if it is zero. 3355 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3356 submatrix (same value is used for all local rows). 3357 - o_nnz - array containing the number of nonzeros in the various rows of the 3358 OFF-DIAGONAL portion of the local submatrix (possibly different for 3359 each row) or NULL, if o_nz is used to specify the nonzero 3360 structure. The size of this array is equal to the number 3361 of local rows, i.e 'm'. 3362 3363 If the *_nnz parameter is given then the *_nz parameter is ignored 3364 3365 The AIJ format (also called the Yale sparse matrix format or 3366 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3367 storage. The stored row and column indices begin with zero. 3368 See Users-Manual: ch_mat for details. 3369 3370 The parallel matrix is partitioned such that the first m0 rows belong to 3371 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3372 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3373 3374 The DIAGONAL portion of the local submatrix of a processor can be defined 3375 as the submatrix which is obtained by extraction the part corresponding to 3376 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3377 first row that belongs to the processor, r2 is the last row belonging to 3378 the this processor, and c1-c2 is range of indices of the local part of a 3379 vector suitable for applying the matrix to. This is an mxn matrix. In the 3380 common case of a square matrix, the row and column ranges are the same and 3381 the DIAGONAL part is also square. The remaining portion of the local 3382 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3383 3384 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3385 3386 You can call MatGetInfo() to get information on how effective the preallocation was; 3387 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3388 You can also run with the option -info and look for messages with the string 3389 malloc in them to see if additional memory allocation was needed. 3390 3391 Example usage: 3392 3393 Consider the following 8x8 matrix with 34 non-zero values, that is 3394 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3395 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3396 as follows: 3397 3398 .vb 3399 1 2 0 | 0 3 0 | 0 4 3400 Proc0 0 5 6 | 7 0 0 | 8 0 3401 9 0 10 | 11 0 0 | 12 0 3402 ------------------------------------- 3403 13 0 14 | 15 16 17 | 0 0 3404 Proc1 0 18 0 | 19 20 21 | 0 0 3405 0 0 0 | 22 23 0 | 24 0 3406 ------------------------------------- 3407 Proc2 25 26 27 | 0 0 28 | 29 0 3408 30 0 0 | 31 32 33 | 0 34 3409 .ve 3410 3411 This can be represented as a collection of submatrices as: 3412 3413 .vb 3414 A B C 3415 D E F 3416 G H I 3417 .ve 3418 3419 Where the submatrices A,B,C are owned by proc0, D,E,F are 3420 owned by proc1, G,H,I are owned by proc2. 3421 3422 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3423 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3424 The 'M','N' parameters are 8,8, and have the same values on all procs. 3425 3426 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3427 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3428 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3429 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3430 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3431 matrix, ans [DF] as another SeqAIJ matrix. 3432 3433 When d_nz, o_nz parameters are specified, d_nz storage elements are 3434 allocated for every row of the local diagonal submatrix, and o_nz 3435 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3436 One way to choose d_nz and o_nz is to use the max nonzerors per local 3437 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3438 In this case, the values of d_nz,o_nz are: 3439 .vb 3440 proc0 : dnz = 2, o_nz = 2 3441 proc1 : dnz = 3, o_nz = 2 3442 proc2 : dnz = 1, o_nz = 4 3443 .ve 3444 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3445 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3446 for proc3. i.e we are using 12+15+10=37 storage locations to store 3447 34 values. 3448 3449 When d_nnz, o_nnz parameters are specified, the storage is specified 3450 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3451 In the above case the values for d_nnz,o_nnz are: 3452 .vb 3453 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3454 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3455 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3456 .ve 3457 Here the space allocated is sum of all the above values i.e 34, and 3458 hence pre-allocation is perfect. 3459 3460 Level: intermediate 3461 3462 .keywords: matrix, aij, compressed row, sparse, parallel 3463 3464 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3465 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3466 @*/ 3467 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3468 { 3469 PetscErrorCode ierr; 3470 3471 PetscFunctionBegin; 3472 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3473 PetscValidType(B,1); 3474 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3475 PetscFunctionReturn(0); 3476 } 3477 3478 #undef __FUNCT__ 3479 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3480 /*@ 3481 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3482 CSR format the local rows. 3483 3484 Collective on MPI_Comm 3485 3486 Input Parameters: 3487 + comm - MPI communicator 3488 . m - number of local rows (Cannot be PETSC_DECIDE) 3489 . n - This value should be the same as the local size used in creating the 3490 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3491 calculated if N is given) For square matrices n is almost always m. 3492 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3493 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3494 . i - row indices 3495 . j - column indices 3496 - a - matrix values 3497 3498 Output Parameter: 3499 . mat - the matrix 3500 3501 Level: intermediate 3502 3503 Notes: 3504 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3505 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3506 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3507 3508 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3509 3510 The format which is used for the sparse matrix input, is equivalent to a 3511 row-major ordering.. i.e for the following matrix, the input data expected is 3512 as shown: 3513 3514 1 0 0 3515 2 0 3 P0 3516 ------- 3517 4 5 6 P1 3518 3519 Process0 [P0]: rows_owned=[0,1] 3520 i = {0,1,3} [size = nrow+1 = 2+1] 3521 j = {0,0,2} [size = nz = 6] 3522 v = {1,2,3} [size = nz = 6] 3523 3524 Process1 [P1]: rows_owned=[2] 3525 i = {0,3} [size = nrow+1 = 1+1] 3526 j = {0,1,2} [size = nz = 6] 3527 v = {4,5,6} [size = nz = 6] 3528 3529 .keywords: matrix, aij, compressed row, sparse, parallel 3530 3531 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3532 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3533 @*/ 3534 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3535 { 3536 PetscErrorCode ierr; 3537 3538 PetscFunctionBegin; 3539 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3540 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3541 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3542 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3543 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3544 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3545 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3546 PetscFunctionReturn(0); 3547 } 3548 3549 #undef __FUNCT__ 3550 #define __FUNCT__ "MatCreateAIJ" 3551 /*@C 3552 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3553 (the default parallel PETSc format). For good matrix assembly performance 3554 the user should preallocate the matrix storage by setting the parameters 3555 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3556 performance can be increased by more than a factor of 50. 3557 3558 Collective on MPI_Comm 3559 3560 Input Parameters: 3561 + comm - MPI communicator 3562 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3563 This value should be the same as the local size used in creating the 3564 y vector for the matrix-vector product y = Ax. 3565 . n - This value should be the same as the local size used in creating the 3566 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3567 calculated if N is given) For square matrices n is almost always m. 3568 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3569 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3570 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3571 (same value is used for all local rows) 3572 . d_nnz - array containing the number of nonzeros in the various rows of the 3573 DIAGONAL portion of the local submatrix (possibly different for each row) 3574 or NULL, if d_nz is used to specify the nonzero structure. 3575 The size of this array is equal to the number of local rows, i.e 'm'. 3576 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3577 submatrix (same value is used for all local rows). 3578 - o_nnz - array containing the number of nonzeros in the various rows of the 3579 OFF-DIAGONAL portion of the local submatrix (possibly different for 3580 each row) or NULL, if o_nz is used to specify the nonzero 3581 structure. The size of this array is equal to the number 3582 of local rows, i.e 'm'. 3583 3584 Output Parameter: 3585 . A - the matrix 3586 3587 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3588 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3589 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3590 3591 Notes: 3592 If the *_nnz parameter is given then the *_nz parameter is ignored 3593 3594 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3595 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3596 storage requirements for this matrix. 3597 3598 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3599 processor than it must be used on all processors that share the object for 3600 that argument. 3601 3602 The user MUST specify either the local or global matrix dimensions 3603 (possibly both). 3604 3605 The parallel matrix is partitioned across processors such that the 3606 first m0 rows belong to process 0, the next m1 rows belong to 3607 process 1, the next m2 rows belong to process 2 etc.. where 3608 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3609 values corresponding to [m x N] submatrix. 3610 3611 The columns are logically partitioned with the n0 columns belonging 3612 to 0th partition, the next n1 columns belonging to the next 3613 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3614 3615 The DIAGONAL portion of the local submatrix on any given processor 3616 is the submatrix corresponding to the rows and columns m,n 3617 corresponding to the given processor. i.e diagonal matrix on 3618 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3619 etc. The remaining portion of the local submatrix [m x (N-n)] 3620 constitute the OFF-DIAGONAL portion. The example below better 3621 illustrates this concept. 3622 3623 For a square global matrix we define each processor's diagonal portion 3624 to be its local rows and the corresponding columns (a square submatrix); 3625 each processor's off-diagonal portion encompasses the remainder of the 3626 local matrix (a rectangular submatrix). 3627 3628 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3629 3630 When calling this routine with a single process communicator, a matrix of 3631 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3632 type of communicator, use the construction mechanism: 3633 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3634 3635 By default, this format uses inodes (identical nodes) when possible. 3636 We search for consecutive rows with the same nonzero structure, thereby 3637 reusing matrix information to achieve increased efficiency. 3638 3639 Options Database Keys: 3640 + -mat_no_inode - Do not use inodes 3641 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3642 - -mat_aij_oneindex - Internally use indexing starting at 1 3643 rather than 0. Note that when calling MatSetValues(), 3644 the user still MUST index entries starting at 0! 3645 3646 3647 Example usage: 3648 3649 Consider the following 8x8 matrix with 34 non-zero values, that is 3650 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3651 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3652 as follows: 3653 3654 .vb 3655 1 2 0 | 0 3 0 | 0 4 3656 Proc0 0 5 6 | 7 0 0 | 8 0 3657 9 0 10 | 11 0 0 | 12 0 3658 ------------------------------------- 3659 13 0 14 | 15 16 17 | 0 0 3660 Proc1 0 18 0 | 19 20 21 | 0 0 3661 0 0 0 | 22 23 0 | 24 0 3662 ------------------------------------- 3663 Proc2 25 26 27 | 0 0 28 | 29 0 3664 30 0 0 | 31 32 33 | 0 34 3665 .ve 3666 3667 This can be represented as a collection of submatrices as: 3668 3669 .vb 3670 A B C 3671 D E F 3672 G H I 3673 .ve 3674 3675 Where the submatrices A,B,C are owned by proc0, D,E,F are 3676 owned by proc1, G,H,I are owned by proc2. 3677 3678 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3679 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3680 The 'M','N' parameters are 8,8, and have the same values on all procs. 3681 3682 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3683 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3684 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3685 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3686 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3687 matrix, ans [DF] as another SeqAIJ matrix. 3688 3689 When d_nz, o_nz parameters are specified, d_nz storage elements are 3690 allocated for every row of the local diagonal submatrix, and o_nz 3691 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3692 One way to choose d_nz and o_nz is to use the max nonzerors per local 3693 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3694 In this case, the values of d_nz,o_nz are: 3695 .vb 3696 proc0 : dnz = 2, o_nz = 2 3697 proc1 : dnz = 3, o_nz = 2 3698 proc2 : dnz = 1, o_nz = 4 3699 .ve 3700 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3701 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3702 for proc3. i.e we are using 12+15+10=37 storage locations to store 3703 34 values. 3704 3705 When d_nnz, o_nnz parameters are specified, the storage is specified 3706 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3707 In the above case the values for d_nnz,o_nnz are: 3708 .vb 3709 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3710 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3711 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3712 .ve 3713 Here the space allocated is sum of all the above values i.e 34, and 3714 hence pre-allocation is perfect. 3715 3716 Level: intermediate 3717 3718 .keywords: matrix, aij, compressed row, sparse, parallel 3719 3720 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3721 MPIAIJ, MatCreateMPIAIJWithArrays() 3722 @*/ 3723 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3724 { 3725 PetscErrorCode ierr; 3726 PetscMPIInt size; 3727 3728 PetscFunctionBegin; 3729 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3730 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3731 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3732 if (size > 1) { 3733 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3734 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3735 } else { 3736 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3737 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3738 } 3739 PetscFunctionReturn(0); 3740 } 3741 3742 #undef __FUNCT__ 3743 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3744 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3745 { 3746 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3747 3748 PetscFunctionBegin; 3749 if (Ad) *Ad = a->A; 3750 if (Ao) *Ao = a->B; 3751 if (colmap) *colmap = a->garray; 3752 PetscFunctionReturn(0); 3753 } 3754 3755 #undef __FUNCT__ 3756 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3757 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3758 { 3759 PetscErrorCode ierr; 3760 PetscInt i; 3761 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3762 3763 PetscFunctionBegin; 3764 if (coloring->ctype == IS_COLORING_GLOBAL) { 3765 ISColoringValue *allcolors,*colors; 3766 ISColoring ocoloring; 3767 3768 /* set coloring for diagonal portion */ 3769 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3770 3771 /* set coloring for off-diagonal portion */ 3772 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3773 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3774 for (i=0; i<a->B->cmap->n; i++) { 3775 colors[i] = allcolors[a->garray[i]]; 3776 } 3777 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3778 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3779 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3780 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3781 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3782 ISColoringValue *colors; 3783 PetscInt *larray; 3784 ISColoring ocoloring; 3785 3786 /* set coloring for diagonal portion */ 3787 ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr); 3788 for (i=0; i<a->A->cmap->n; i++) { 3789 larray[i] = i + A->cmap->rstart; 3790 } 3791 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3792 ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr); 3793 for (i=0; i<a->A->cmap->n; i++) { 3794 colors[i] = coloring->colors[larray[i]]; 3795 } 3796 ierr = PetscFree(larray);CHKERRQ(ierr); 3797 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3798 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3799 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3800 3801 /* set coloring for off-diagonal portion */ 3802 ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr); 3803 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3804 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3805 for (i=0; i<a->B->cmap->n; i++) { 3806 colors[i] = coloring->colors[larray[i]]; 3807 } 3808 ierr = PetscFree(larray);CHKERRQ(ierr); 3809 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3810 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3811 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3812 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3813 PetscFunctionReturn(0); 3814 } 3815 3816 #undef __FUNCT__ 3817 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3818 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3819 { 3820 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3821 PetscErrorCode ierr; 3822 3823 PetscFunctionBegin; 3824 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3825 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3826 PetscFunctionReturn(0); 3827 } 3828 3829 #undef __FUNCT__ 3830 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3831 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3832 { 3833 PetscErrorCode ierr; 3834 PetscInt m,N,i,rstart,nnz,Ii; 3835 PetscInt *indx; 3836 PetscScalar *values; 3837 3838 PetscFunctionBegin; 3839 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3840 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3841 PetscInt *dnz,*onz,sum,bs,cbs; 3842 3843 if (n == PETSC_DECIDE) { 3844 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3845 } 3846 /* Check sum(n) = N */ 3847 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3848 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3849 3850 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3851 rstart -= m; 3852 3853 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3854 for (i=0; i<m; i++) { 3855 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3856 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3857 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3858 } 3859 3860 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3861 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3862 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3863 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3864 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3865 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3866 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3867 } 3868 3869 /* numeric phase */ 3870 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3871 for (i=0; i<m; i++) { 3872 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3873 Ii = i + rstart; 3874 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3875 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3876 } 3877 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3878 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3879 PetscFunctionReturn(0); 3880 } 3881 3882 #undef __FUNCT__ 3883 #define __FUNCT__ "MatFileSplit" 3884 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3885 { 3886 PetscErrorCode ierr; 3887 PetscMPIInt rank; 3888 PetscInt m,N,i,rstart,nnz; 3889 size_t len; 3890 const PetscInt *indx; 3891 PetscViewer out; 3892 char *name; 3893 Mat B; 3894 const PetscScalar *values; 3895 3896 PetscFunctionBegin; 3897 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3898 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3899 /* Should this be the type of the diagonal block of A? */ 3900 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3901 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3902 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3903 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3904 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3905 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3906 for (i=0; i<m; i++) { 3907 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3908 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3909 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3910 } 3911 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3912 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3913 3914 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3915 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3916 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3917 sprintf(name,"%s.%d",outfile,rank); 3918 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3919 ierr = PetscFree(name);CHKERRQ(ierr); 3920 ierr = MatView(B,out);CHKERRQ(ierr); 3921 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3922 ierr = MatDestroy(&B);CHKERRQ(ierr); 3923 PetscFunctionReturn(0); 3924 } 3925 3926 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3927 #undef __FUNCT__ 3928 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3929 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3930 { 3931 PetscErrorCode ierr; 3932 Mat_Merge_SeqsToMPI *merge; 3933 PetscContainer container; 3934 3935 PetscFunctionBegin; 3936 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3937 if (container) { 3938 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3939 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3940 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3941 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3942 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3943 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3944 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3945 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3946 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3947 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3948 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3949 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3950 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3951 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3952 ierr = PetscFree(merge);CHKERRQ(ierr); 3953 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3954 } 3955 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3956 PetscFunctionReturn(0); 3957 } 3958 3959 #include <../src/mat/utils/freespace.h> 3960 #include <petscbt.h> 3961 3962 #undef __FUNCT__ 3963 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3964 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3965 { 3966 PetscErrorCode ierr; 3967 MPI_Comm comm; 3968 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3969 PetscMPIInt size,rank,taga,*len_s; 3970 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3971 PetscInt proc,m; 3972 PetscInt **buf_ri,**buf_rj; 3973 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3974 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3975 MPI_Request *s_waits,*r_waits; 3976 MPI_Status *status; 3977 MatScalar *aa=a->a; 3978 MatScalar **abuf_r,*ba_i; 3979 Mat_Merge_SeqsToMPI *merge; 3980 PetscContainer container; 3981 3982 PetscFunctionBegin; 3983 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3984 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3985 3986 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3987 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3988 3989 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3990 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3991 3992 bi = merge->bi; 3993 bj = merge->bj; 3994 buf_ri = merge->buf_ri; 3995 buf_rj = merge->buf_rj; 3996 3997 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3998 owners = merge->rowmap->range; 3999 len_s = merge->len_s; 4000 4001 /* send and recv matrix values */ 4002 /*-----------------------------*/ 4003 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4004 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4005 4006 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4007 for (proc=0,k=0; proc<size; proc++) { 4008 if (!len_s[proc]) continue; 4009 i = owners[proc]; 4010 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4011 k++; 4012 } 4013 4014 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4015 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4016 ierr = PetscFree(status);CHKERRQ(ierr); 4017 4018 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4019 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4020 4021 /* insert mat values of mpimat */ 4022 /*----------------------------*/ 4023 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4024 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4025 4026 for (k=0; k<merge->nrecv; k++) { 4027 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4028 nrows = *(buf_ri_k[k]); 4029 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4030 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4031 } 4032 4033 /* set values of ba */ 4034 m = merge->rowmap->n; 4035 for (i=0; i<m; i++) { 4036 arow = owners[rank] + i; 4037 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4038 bnzi = bi[i+1] - bi[i]; 4039 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4040 4041 /* add local non-zero vals of this proc's seqmat into ba */ 4042 anzi = ai[arow+1] - ai[arow]; 4043 aj = a->j + ai[arow]; 4044 aa = a->a + ai[arow]; 4045 nextaj = 0; 4046 for (j=0; nextaj<anzi; j++) { 4047 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4048 ba_i[j] += aa[nextaj++]; 4049 } 4050 } 4051 4052 /* add received vals into ba */ 4053 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4054 /* i-th row */ 4055 if (i == *nextrow[k]) { 4056 anzi = *(nextai[k]+1) - *nextai[k]; 4057 aj = buf_rj[k] + *(nextai[k]); 4058 aa = abuf_r[k] + *(nextai[k]); 4059 nextaj = 0; 4060 for (j=0; nextaj<anzi; j++) { 4061 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4062 ba_i[j] += aa[nextaj++]; 4063 } 4064 } 4065 nextrow[k]++; nextai[k]++; 4066 } 4067 } 4068 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4069 } 4070 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4071 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4072 4073 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4074 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4075 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4076 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4077 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4078 PetscFunctionReturn(0); 4079 } 4080 4081 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4082 4083 #undef __FUNCT__ 4084 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4085 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4086 { 4087 PetscErrorCode ierr; 4088 Mat B_mpi; 4089 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4090 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4091 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4092 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4093 PetscInt len,proc,*dnz,*onz,bs,cbs; 4094 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4095 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4096 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4097 MPI_Status *status; 4098 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4099 PetscBT lnkbt; 4100 Mat_Merge_SeqsToMPI *merge; 4101 PetscContainer container; 4102 4103 PetscFunctionBegin; 4104 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4105 4106 /* make sure it is a PETSc comm */ 4107 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4108 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4110 4111 ierr = PetscNew(&merge);CHKERRQ(ierr); 4112 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4113 4114 /* determine row ownership */ 4115 /*---------------------------------------------------------*/ 4116 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4117 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4118 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4119 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4120 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4121 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4122 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4123 4124 m = merge->rowmap->n; 4125 owners = merge->rowmap->range; 4126 4127 /* determine the number of messages to send, their lengths */ 4128 /*---------------------------------------------------------*/ 4129 len_s = merge->len_s; 4130 4131 len = 0; /* length of buf_si[] */ 4132 merge->nsend = 0; 4133 for (proc=0; proc<size; proc++) { 4134 len_si[proc] = 0; 4135 if (proc == rank) { 4136 len_s[proc] = 0; 4137 } else { 4138 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4139 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4140 } 4141 if (len_s[proc]) { 4142 merge->nsend++; 4143 nrows = 0; 4144 for (i=owners[proc]; i<owners[proc+1]; i++) { 4145 if (ai[i+1] > ai[i]) nrows++; 4146 } 4147 len_si[proc] = 2*(nrows+1); 4148 len += len_si[proc]; 4149 } 4150 } 4151 4152 /* determine the number and length of messages to receive for ij-structure */ 4153 /*-------------------------------------------------------------------------*/ 4154 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4155 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4156 4157 /* post the Irecv of j-structure */ 4158 /*-------------------------------*/ 4159 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4160 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4161 4162 /* post the Isend of j-structure */ 4163 /*--------------------------------*/ 4164 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4165 4166 for (proc=0, k=0; proc<size; proc++) { 4167 if (!len_s[proc]) continue; 4168 i = owners[proc]; 4169 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4170 k++; 4171 } 4172 4173 /* receives and sends of j-structure are complete */ 4174 /*------------------------------------------------*/ 4175 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4176 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4177 4178 /* send and recv i-structure */ 4179 /*---------------------------*/ 4180 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4181 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4182 4183 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4184 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4185 for (proc=0,k=0; proc<size; proc++) { 4186 if (!len_s[proc]) continue; 4187 /* form outgoing message for i-structure: 4188 buf_si[0]: nrows to be sent 4189 [1:nrows]: row index (global) 4190 [nrows+1:2*nrows+1]: i-structure index 4191 */ 4192 /*-------------------------------------------*/ 4193 nrows = len_si[proc]/2 - 1; 4194 buf_si_i = buf_si + nrows+1; 4195 buf_si[0] = nrows; 4196 buf_si_i[0] = 0; 4197 nrows = 0; 4198 for (i=owners[proc]; i<owners[proc+1]; i++) { 4199 anzi = ai[i+1] - ai[i]; 4200 if (anzi) { 4201 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4202 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4203 nrows++; 4204 } 4205 } 4206 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4207 k++; 4208 buf_si += len_si[proc]; 4209 } 4210 4211 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4212 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4213 4214 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4215 for (i=0; i<merge->nrecv; i++) { 4216 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4217 } 4218 4219 ierr = PetscFree(len_si);CHKERRQ(ierr); 4220 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4221 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4222 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4223 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4224 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4225 ierr = PetscFree(status);CHKERRQ(ierr); 4226 4227 /* compute a local seq matrix in each processor */ 4228 /*----------------------------------------------*/ 4229 /* allocate bi array and free space for accumulating nonzero column info */ 4230 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4231 bi[0] = 0; 4232 4233 /* create and initialize a linked list */ 4234 nlnk = N+1; 4235 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4236 4237 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4238 len = ai[owners[rank+1]] - ai[owners[rank]]; 4239 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4240 4241 current_space = free_space; 4242 4243 /* determine symbolic info for each local row */ 4244 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4245 4246 for (k=0; k<merge->nrecv; k++) { 4247 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4248 nrows = *buf_ri_k[k]; 4249 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4250 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4251 } 4252 4253 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4254 len = 0; 4255 for (i=0; i<m; i++) { 4256 bnzi = 0; 4257 /* add local non-zero cols of this proc's seqmat into lnk */ 4258 arow = owners[rank] + i; 4259 anzi = ai[arow+1] - ai[arow]; 4260 aj = a->j + ai[arow]; 4261 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4262 bnzi += nlnk; 4263 /* add received col data into lnk */ 4264 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4265 if (i == *nextrow[k]) { /* i-th row */ 4266 anzi = *(nextai[k]+1) - *nextai[k]; 4267 aj = buf_rj[k] + *nextai[k]; 4268 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4269 bnzi += nlnk; 4270 nextrow[k]++; nextai[k]++; 4271 } 4272 } 4273 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4274 4275 /* if free space is not available, make more free space */ 4276 if (current_space->local_remaining<bnzi) { 4277 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4278 nspacedouble++; 4279 } 4280 /* copy data into free space, then initialize lnk */ 4281 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4282 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4283 4284 current_space->array += bnzi; 4285 current_space->local_used += bnzi; 4286 current_space->local_remaining -= bnzi; 4287 4288 bi[i+1] = bi[i] + bnzi; 4289 } 4290 4291 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4292 4293 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4294 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4295 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4296 4297 /* create symbolic parallel matrix B_mpi */ 4298 /*---------------------------------------*/ 4299 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4300 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4301 if (n==PETSC_DECIDE) { 4302 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4303 } else { 4304 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4305 } 4306 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4307 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4308 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4309 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4310 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4311 4312 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4313 B_mpi->assembled = PETSC_FALSE; 4314 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4315 merge->bi = bi; 4316 merge->bj = bj; 4317 merge->buf_ri = buf_ri; 4318 merge->buf_rj = buf_rj; 4319 merge->coi = NULL; 4320 merge->coj = NULL; 4321 merge->owners_co = NULL; 4322 4323 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4324 4325 /* attach the supporting struct to B_mpi for reuse */ 4326 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4327 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4328 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4329 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4330 *mpimat = B_mpi; 4331 4332 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4333 PetscFunctionReturn(0); 4334 } 4335 4336 #undef __FUNCT__ 4337 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4338 /*@C 4339 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4340 matrices from each processor 4341 4342 Collective on MPI_Comm 4343 4344 Input Parameters: 4345 + comm - the communicators the parallel matrix will live on 4346 . seqmat - the input sequential matrices 4347 . m - number of local rows (or PETSC_DECIDE) 4348 . n - number of local columns (or PETSC_DECIDE) 4349 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4350 4351 Output Parameter: 4352 . mpimat - the parallel matrix generated 4353 4354 Level: advanced 4355 4356 Notes: 4357 The dimensions of the sequential matrix in each processor MUST be the same. 4358 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4359 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4360 @*/ 4361 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4362 { 4363 PetscErrorCode ierr; 4364 PetscMPIInt size; 4365 4366 PetscFunctionBegin; 4367 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4368 if (size == 1) { 4369 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4370 if (scall == MAT_INITIAL_MATRIX) { 4371 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4372 } else { 4373 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4374 } 4375 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4376 PetscFunctionReturn(0); 4377 } 4378 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4379 if (scall == MAT_INITIAL_MATRIX) { 4380 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4381 } 4382 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4383 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4384 PetscFunctionReturn(0); 4385 } 4386 4387 #undef __FUNCT__ 4388 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4389 /*@ 4390 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4391 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4392 with MatGetSize() 4393 4394 Not Collective 4395 4396 Input Parameters: 4397 + A - the matrix 4398 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4399 4400 Output Parameter: 4401 . A_loc - the local sequential matrix generated 4402 4403 Level: developer 4404 4405 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4406 4407 @*/ 4408 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4409 { 4410 PetscErrorCode ierr; 4411 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4412 Mat_SeqAIJ *mat,*a,*b; 4413 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4414 MatScalar *aa,*ba,*cam; 4415 PetscScalar *ca; 4416 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4417 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4418 PetscBool match; 4419 MPI_Comm comm; 4420 PetscMPIInt size; 4421 4422 PetscFunctionBegin; 4423 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4424 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4425 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4426 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4427 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4428 4429 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4430 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4431 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4432 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4433 aa = a->a; ba = b->a; 4434 if (scall == MAT_INITIAL_MATRIX) { 4435 if (size == 1) { 4436 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4437 PetscFunctionReturn(0); 4438 } 4439 4440 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4441 ci[0] = 0; 4442 for (i=0; i<am; i++) { 4443 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4444 } 4445 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4446 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4447 k = 0; 4448 for (i=0; i<am; i++) { 4449 ncols_o = bi[i+1] - bi[i]; 4450 ncols_d = ai[i+1] - ai[i]; 4451 /* off-diagonal portion of A */ 4452 for (jo=0; jo<ncols_o; jo++) { 4453 col = cmap[*bj]; 4454 if (col >= cstart) break; 4455 cj[k] = col; bj++; 4456 ca[k++] = *ba++; 4457 } 4458 /* diagonal portion of A */ 4459 for (j=0; j<ncols_d; j++) { 4460 cj[k] = cstart + *aj++; 4461 ca[k++] = *aa++; 4462 } 4463 /* off-diagonal portion of A */ 4464 for (j=jo; j<ncols_o; j++) { 4465 cj[k] = cmap[*bj++]; 4466 ca[k++] = *ba++; 4467 } 4468 } 4469 /* put together the new matrix */ 4470 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4471 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4472 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4473 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4474 mat->free_a = PETSC_TRUE; 4475 mat->free_ij = PETSC_TRUE; 4476 mat->nonew = 0; 4477 } else if (scall == MAT_REUSE_MATRIX) { 4478 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4479 ci = mat->i; cj = mat->j; cam = mat->a; 4480 for (i=0; i<am; i++) { 4481 /* off-diagonal portion of A */ 4482 ncols_o = bi[i+1] - bi[i]; 4483 for (jo=0; jo<ncols_o; jo++) { 4484 col = cmap[*bj]; 4485 if (col >= cstart) break; 4486 *cam++ = *ba++; bj++; 4487 } 4488 /* diagonal portion of A */ 4489 ncols_d = ai[i+1] - ai[i]; 4490 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4491 /* off-diagonal portion of A */ 4492 for (j=jo; j<ncols_o; j++) { 4493 *cam++ = *ba++; bj++; 4494 } 4495 } 4496 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4497 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4498 PetscFunctionReturn(0); 4499 } 4500 4501 #undef __FUNCT__ 4502 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4503 /*@C 4504 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4505 4506 Not Collective 4507 4508 Input Parameters: 4509 + A - the matrix 4510 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4511 - row, col - index sets of rows and columns to extract (or NULL) 4512 4513 Output Parameter: 4514 . A_loc - the local sequential matrix generated 4515 4516 Level: developer 4517 4518 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4519 4520 @*/ 4521 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4522 { 4523 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4524 PetscErrorCode ierr; 4525 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4526 IS isrowa,iscola; 4527 Mat *aloc; 4528 PetscBool match; 4529 4530 PetscFunctionBegin; 4531 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4532 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4533 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4534 if (!row) { 4535 start = A->rmap->rstart; end = A->rmap->rend; 4536 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4537 } else { 4538 isrowa = *row; 4539 } 4540 if (!col) { 4541 start = A->cmap->rstart; 4542 cmap = a->garray; 4543 nzA = a->A->cmap->n; 4544 nzB = a->B->cmap->n; 4545 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4546 ncols = 0; 4547 for (i=0; i<nzB; i++) { 4548 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4549 else break; 4550 } 4551 imark = i; 4552 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4553 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4554 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4555 } else { 4556 iscola = *col; 4557 } 4558 if (scall != MAT_INITIAL_MATRIX) { 4559 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4560 aloc[0] = *A_loc; 4561 } 4562 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4563 *A_loc = aloc[0]; 4564 ierr = PetscFree(aloc);CHKERRQ(ierr); 4565 if (!row) { 4566 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4567 } 4568 if (!col) { 4569 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4570 } 4571 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4572 PetscFunctionReturn(0); 4573 } 4574 4575 #undef __FUNCT__ 4576 #define __FUNCT__ "MatGetBrowsOfAcols" 4577 /*@C 4578 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4579 4580 Collective on Mat 4581 4582 Input Parameters: 4583 + A,B - the matrices in mpiaij format 4584 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4585 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4586 4587 Output Parameter: 4588 + rowb, colb - index sets of rows and columns of B to extract 4589 - B_seq - the sequential matrix generated 4590 4591 Level: developer 4592 4593 @*/ 4594 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4595 { 4596 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4597 PetscErrorCode ierr; 4598 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4599 IS isrowb,iscolb; 4600 Mat *bseq=NULL; 4601 4602 PetscFunctionBegin; 4603 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4604 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4605 } 4606 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4607 4608 if (scall == MAT_INITIAL_MATRIX) { 4609 start = A->cmap->rstart; 4610 cmap = a->garray; 4611 nzA = a->A->cmap->n; 4612 nzB = a->B->cmap->n; 4613 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4614 ncols = 0; 4615 for (i=0; i<nzB; i++) { /* row < local row index */ 4616 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4617 else break; 4618 } 4619 imark = i; 4620 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4621 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4622 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4623 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4624 } else { 4625 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4626 isrowb = *rowb; iscolb = *colb; 4627 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4628 bseq[0] = *B_seq; 4629 } 4630 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4631 *B_seq = bseq[0]; 4632 ierr = PetscFree(bseq);CHKERRQ(ierr); 4633 if (!rowb) { 4634 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4635 } else { 4636 *rowb = isrowb; 4637 } 4638 if (!colb) { 4639 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4640 } else { 4641 *colb = iscolb; 4642 } 4643 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4644 PetscFunctionReturn(0); 4645 } 4646 4647 #undef __FUNCT__ 4648 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4649 /* 4650 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4651 of the OFF-DIAGONAL portion of local A 4652 4653 Collective on Mat 4654 4655 Input Parameters: 4656 + A,B - the matrices in mpiaij format 4657 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4658 4659 Output Parameter: 4660 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4661 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4662 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4663 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4664 4665 Level: developer 4666 4667 */ 4668 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4669 { 4670 VecScatter_MPI_General *gen_to,*gen_from; 4671 PetscErrorCode ierr; 4672 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4673 Mat_SeqAIJ *b_oth; 4674 VecScatter ctx =a->Mvctx; 4675 MPI_Comm comm; 4676 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4677 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4678 PetscScalar *rvalues,*svalues; 4679 MatScalar *b_otha,*bufa,*bufA; 4680 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4681 MPI_Request *rwaits = NULL,*swaits = NULL; 4682 MPI_Status *sstatus,rstatus; 4683 PetscMPIInt jj,size; 4684 PetscInt *cols,sbs,rbs; 4685 PetscScalar *vals; 4686 4687 PetscFunctionBegin; 4688 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4689 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4690 if (size == 1) PetscFunctionReturn(0); 4691 4692 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4693 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4694 } 4695 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4696 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4697 4698 gen_to = (VecScatter_MPI_General*)ctx->todata; 4699 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4700 rvalues = gen_from->values; /* holds the length of receiving row */ 4701 svalues = gen_to->values; /* holds the length of sending row */ 4702 nrecvs = gen_from->n; 4703 nsends = gen_to->n; 4704 4705 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4706 srow = gen_to->indices; /* local row index to be sent */ 4707 sstarts = gen_to->starts; 4708 sprocs = gen_to->procs; 4709 sstatus = gen_to->sstatus; 4710 sbs = gen_to->bs; 4711 rstarts = gen_from->starts; 4712 rprocs = gen_from->procs; 4713 rbs = gen_from->bs; 4714 4715 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4716 if (scall == MAT_INITIAL_MATRIX) { 4717 /* i-array */ 4718 /*---------*/ 4719 /* post receives */ 4720 for (i=0; i<nrecvs; i++) { 4721 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4722 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4723 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4724 } 4725 4726 /* pack the outgoing message */ 4727 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4728 4729 sstartsj[0] = 0; 4730 rstartsj[0] = 0; 4731 len = 0; /* total length of j or a array to be sent */ 4732 k = 0; 4733 for (i=0; i<nsends; i++) { 4734 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4735 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4736 for (j=0; j<nrows; j++) { 4737 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4738 for (l=0; l<sbs; l++) { 4739 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4740 4741 rowlen[j*sbs+l] = ncols; 4742 4743 len += ncols; 4744 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4745 } 4746 k++; 4747 } 4748 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4749 4750 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4751 } 4752 /* recvs and sends of i-array are completed */ 4753 i = nrecvs; 4754 while (i--) { 4755 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4756 } 4757 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4758 4759 /* allocate buffers for sending j and a arrays */ 4760 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4761 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4762 4763 /* create i-array of B_oth */ 4764 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4765 4766 b_othi[0] = 0; 4767 len = 0; /* total length of j or a array to be received */ 4768 k = 0; 4769 for (i=0; i<nrecvs; i++) { 4770 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4771 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4772 for (j=0; j<nrows; j++) { 4773 b_othi[k+1] = b_othi[k] + rowlen[j]; 4774 len += rowlen[j]; k++; 4775 } 4776 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4777 } 4778 4779 /* allocate space for j and a arrrays of B_oth */ 4780 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4781 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4782 4783 /* j-array */ 4784 /*---------*/ 4785 /* post receives of j-array */ 4786 for (i=0; i<nrecvs; i++) { 4787 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4788 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4789 } 4790 4791 /* pack the outgoing message j-array */ 4792 k = 0; 4793 for (i=0; i<nsends; i++) { 4794 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4795 bufJ = bufj+sstartsj[i]; 4796 for (j=0; j<nrows; j++) { 4797 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4798 for (ll=0; ll<sbs; ll++) { 4799 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4800 for (l=0; l<ncols; l++) { 4801 *bufJ++ = cols[l]; 4802 } 4803 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4804 } 4805 } 4806 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4807 } 4808 4809 /* recvs and sends of j-array are completed */ 4810 i = nrecvs; 4811 while (i--) { 4812 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4813 } 4814 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4815 } else if (scall == MAT_REUSE_MATRIX) { 4816 sstartsj = *startsj_s; 4817 rstartsj = *startsj_r; 4818 bufa = *bufa_ptr; 4819 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4820 b_otha = b_oth->a; 4821 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4822 4823 /* a-array */ 4824 /*---------*/ 4825 /* post receives of a-array */ 4826 for (i=0; i<nrecvs; i++) { 4827 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4828 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4829 } 4830 4831 /* pack the outgoing message a-array */ 4832 k = 0; 4833 for (i=0; i<nsends; i++) { 4834 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4835 bufA = bufa+sstartsj[i]; 4836 for (j=0; j<nrows; j++) { 4837 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4838 for (ll=0; ll<sbs; ll++) { 4839 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4840 for (l=0; l<ncols; l++) { 4841 *bufA++ = vals[l]; 4842 } 4843 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4844 } 4845 } 4846 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4847 } 4848 /* recvs and sends of a-array are completed */ 4849 i = nrecvs; 4850 while (i--) { 4851 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4852 } 4853 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4854 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4855 4856 if (scall == MAT_INITIAL_MATRIX) { 4857 /* put together the new matrix */ 4858 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4859 4860 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4861 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4862 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4863 b_oth->free_a = PETSC_TRUE; 4864 b_oth->free_ij = PETSC_TRUE; 4865 b_oth->nonew = 0; 4866 4867 ierr = PetscFree(bufj);CHKERRQ(ierr); 4868 if (!startsj_s || !bufa_ptr) { 4869 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4870 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4871 } else { 4872 *startsj_s = sstartsj; 4873 *startsj_r = rstartsj; 4874 *bufa_ptr = bufa; 4875 } 4876 } 4877 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4878 PetscFunctionReturn(0); 4879 } 4880 4881 #undef __FUNCT__ 4882 #define __FUNCT__ "MatGetCommunicationStructs" 4883 /*@C 4884 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4885 4886 Not Collective 4887 4888 Input Parameters: 4889 . A - The matrix in mpiaij format 4890 4891 Output Parameter: 4892 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4893 . colmap - A map from global column index to local index into lvec 4894 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4895 4896 Level: developer 4897 4898 @*/ 4899 #if defined(PETSC_USE_CTABLE) 4900 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4901 #else 4902 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4903 #endif 4904 { 4905 Mat_MPIAIJ *a; 4906 4907 PetscFunctionBegin; 4908 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4909 PetscValidPointer(lvec, 2); 4910 PetscValidPointer(colmap, 3); 4911 PetscValidPointer(multScatter, 4); 4912 a = (Mat_MPIAIJ*) A->data; 4913 if (lvec) *lvec = a->lvec; 4914 if (colmap) *colmap = a->colmap; 4915 if (multScatter) *multScatter = a->Mvctx; 4916 PetscFunctionReturn(0); 4917 } 4918 4919 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4920 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4921 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4922 #if defined(PETSC_HAVE_ELEMENTAL) 4923 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4924 #endif 4925 4926 #undef __FUNCT__ 4927 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4928 /* 4929 Computes (B'*A')' since computing B*A directly is untenable 4930 4931 n p p 4932 ( ) ( ) ( ) 4933 m ( A ) * n ( B ) = m ( C ) 4934 ( ) ( ) ( ) 4935 4936 */ 4937 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4938 { 4939 PetscErrorCode ierr; 4940 Mat At,Bt,Ct; 4941 4942 PetscFunctionBegin; 4943 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4944 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4945 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4946 ierr = MatDestroy(&At);CHKERRQ(ierr); 4947 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4948 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4949 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4950 PetscFunctionReturn(0); 4951 } 4952 4953 #undef __FUNCT__ 4954 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4955 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4956 { 4957 PetscErrorCode ierr; 4958 PetscInt m=A->rmap->n,n=B->cmap->n; 4959 Mat Cmat; 4960 4961 PetscFunctionBegin; 4962 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4963 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4964 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4965 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4966 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4967 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4968 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4969 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4970 4971 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4972 4973 *C = Cmat; 4974 PetscFunctionReturn(0); 4975 } 4976 4977 /* ----------------------------------------------------------------*/ 4978 #undef __FUNCT__ 4979 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4980 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4981 { 4982 PetscErrorCode ierr; 4983 4984 PetscFunctionBegin; 4985 if (scall == MAT_INITIAL_MATRIX) { 4986 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4987 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4988 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4989 } 4990 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4991 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4992 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4993 PetscFunctionReturn(0); 4994 } 4995 4996 /*MC 4997 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4998 4999 Options Database Keys: 5000 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5001 5002 Level: beginner 5003 5004 .seealso: MatCreateAIJ() 5005 M*/ 5006 5007 #undef __FUNCT__ 5008 #define __FUNCT__ "MatCreate_MPIAIJ" 5009 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5010 { 5011 Mat_MPIAIJ *b; 5012 PetscErrorCode ierr; 5013 PetscMPIInt size; 5014 5015 PetscFunctionBegin; 5016 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5017 5018 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5019 B->data = (void*)b; 5020 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5021 B->assembled = PETSC_FALSE; 5022 B->insertmode = NOT_SET_VALUES; 5023 b->size = size; 5024 5025 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5026 5027 /* build cache for off array entries formed */ 5028 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5029 5030 b->donotstash = PETSC_FALSE; 5031 b->colmap = 0; 5032 b->garray = 0; 5033 b->roworiented = PETSC_TRUE; 5034 5035 /* stuff used for matrix vector multiply */ 5036 b->lvec = NULL; 5037 b->Mvctx = NULL; 5038 5039 /* stuff for MatGetRow() */ 5040 b->rowindices = 0; 5041 b->rowvalues = 0; 5042 b->getrowactive = PETSC_FALSE; 5043 5044 /* flexible pointer used in CUSP/CUSPARSE classes */ 5045 b->spptr = NULL; 5046 5047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5057 #if defined(PETSC_HAVE_ELEMENTAL) 5058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5059 #endif 5060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5063 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5064 PetscFunctionReturn(0); 5065 } 5066 5067 #undef __FUNCT__ 5068 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5069 /*@C 5070 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5071 and "off-diagonal" part of the matrix in CSR format. 5072 5073 Collective on MPI_Comm 5074 5075 Input Parameters: 5076 + comm - MPI communicator 5077 . m - number of local rows (Cannot be PETSC_DECIDE) 5078 . n - This value should be the same as the local size used in creating the 5079 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5080 calculated if N is given) For square matrices n is almost always m. 5081 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5082 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5083 . i - row indices for "diagonal" portion of matrix 5084 . j - column indices 5085 . a - matrix values 5086 . oi - row indices for "off-diagonal" portion of matrix 5087 . oj - column indices 5088 - oa - matrix values 5089 5090 Output Parameter: 5091 . mat - the matrix 5092 5093 Level: advanced 5094 5095 Notes: 5096 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5097 must free the arrays once the matrix has been destroyed and not before. 5098 5099 The i and j indices are 0 based 5100 5101 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5102 5103 This sets local rows and cannot be used to set off-processor values. 5104 5105 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5106 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5107 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5108 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5109 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5110 communication if it is known that only local entries will be set. 5111 5112 .keywords: matrix, aij, compressed row, sparse, parallel 5113 5114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5115 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5116 C@*/ 5117 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5118 { 5119 PetscErrorCode ierr; 5120 Mat_MPIAIJ *maij; 5121 5122 PetscFunctionBegin; 5123 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5124 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5125 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5126 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5127 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5128 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5129 maij = (Mat_MPIAIJ*) (*mat)->data; 5130 5131 (*mat)->preallocated = PETSC_TRUE; 5132 5133 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5134 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5135 5136 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5137 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5138 5139 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5140 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5141 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5142 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5143 5144 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5145 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5146 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5147 PetscFunctionReturn(0); 5148 } 5149 5150 /* 5151 Special version for direct calls from Fortran 5152 */ 5153 #include <petsc-private/fortranimpl.h> 5154 5155 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5156 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5157 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5158 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5159 #endif 5160 5161 /* Change these macros so can be used in void function */ 5162 #undef CHKERRQ 5163 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5164 #undef SETERRQ2 5165 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5166 #undef SETERRQ3 5167 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5168 #undef SETERRQ 5169 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5170 5171 #undef __FUNCT__ 5172 #define __FUNCT__ "matsetvaluesmpiaij_" 5173 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5174 { 5175 Mat mat = *mmat; 5176 PetscInt m = *mm, n = *mn; 5177 InsertMode addv = *maddv; 5178 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5179 PetscScalar value; 5180 PetscErrorCode ierr; 5181 5182 MatCheckPreallocated(mat,1); 5183 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5184 5185 #if defined(PETSC_USE_DEBUG) 5186 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5187 #endif 5188 { 5189 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5190 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5191 PetscBool roworiented = aij->roworiented; 5192 5193 /* Some Variables required in the macro */ 5194 Mat A = aij->A; 5195 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5196 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5197 MatScalar *aa = a->a; 5198 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5199 Mat B = aij->B; 5200 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5201 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5202 MatScalar *ba = b->a; 5203 5204 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5205 PetscInt nonew = a->nonew; 5206 MatScalar *ap1,*ap2; 5207 5208 PetscFunctionBegin; 5209 for (i=0; i<m; i++) { 5210 if (im[i] < 0) continue; 5211 #if defined(PETSC_USE_DEBUG) 5212 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5213 #endif 5214 if (im[i] >= rstart && im[i] < rend) { 5215 row = im[i] - rstart; 5216 lastcol1 = -1; 5217 rp1 = aj + ai[row]; 5218 ap1 = aa + ai[row]; 5219 rmax1 = aimax[row]; 5220 nrow1 = ailen[row]; 5221 low1 = 0; 5222 high1 = nrow1; 5223 lastcol2 = -1; 5224 rp2 = bj + bi[row]; 5225 ap2 = ba + bi[row]; 5226 rmax2 = bimax[row]; 5227 nrow2 = bilen[row]; 5228 low2 = 0; 5229 high2 = nrow2; 5230 5231 for (j=0; j<n; j++) { 5232 if (roworiented) value = v[i*n+j]; 5233 else value = v[i+j*m]; 5234 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5235 if (in[j] >= cstart && in[j] < cend) { 5236 col = in[j] - cstart; 5237 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5238 } else if (in[j] < 0) continue; 5239 #if defined(PETSC_USE_DEBUG) 5240 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5241 #endif 5242 else { 5243 if (mat->was_assembled) { 5244 if (!aij->colmap) { 5245 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5246 } 5247 #if defined(PETSC_USE_CTABLE) 5248 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5249 col--; 5250 #else 5251 col = aij->colmap[in[j]] - 1; 5252 #endif 5253 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5254 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5255 col = in[j]; 5256 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5257 B = aij->B; 5258 b = (Mat_SeqAIJ*)B->data; 5259 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5260 rp2 = bj + bi[row]; 5261 ap2 = ba + bi[row]; 5262 rmax2 = bimax[row]; 5263 nrow2 = bilen[row]; 5264 low2 = 0; 5265 high2 = nrow2; 5266 bm = aij->B->rmap->n; 5267 ba = b->a; 5268 } 5269 } else col = in[j]; 5270 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5271 } 5272 } 5273 } else if (!aij->donotstash) { 5274 if (roworiented) { 5275 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5276 } else { 5277 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5278 } 5279 } 5280 } 5281 } 5282 PetscFunctionReturnVoid(); 5283 } 5284 5285