1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 109 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 110 { 111 PetscErrorCode ierr; 112 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 113 114 PetscFunctionBegin; 115 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 116 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 117 } else { 118 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 119 } 120 PetscFunctionReturn(0); 121 } 122 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 127 { 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 129 PetscErrorCode ierr; 130 PetscInt i,rstart,nrows,*rows; 131 132 PetscFunctionBegin; 133 *zrows = NULL; 134 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 135 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 136 for (i=0; i<nrows; i++) rows[i] += rstart; 137 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 144 { 145 PetscErrorCode ierr; 146 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 147 PetscInt i,n,*garray = aij->garray; 148 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 149 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 150 PetscReal *work; 151 152 PetscFunctionBegin; 153 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 154 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 155 if (type == NORM_2) { 156 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 157 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 158 } 159 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 160 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 161 } 162 } else if (type == NORM_1) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 168 } 169 } else if (type == NORM_INFINITY) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 175 } 176 177 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 178 if (type == NORM_INFINITY) { 179 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 180 } else { 181 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 182 } 183 ierr = PetscFree(work);CHKERRQ(ierr); 184 if (type == NORM_2) { 185 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatDistribute_MPIAIJ" 192 /* 193 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 194 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 195 196 Only for square matrices 197 198 Used by a preconditioner, hence PETSC_EXTERN 199 */ 200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 201 { 202 PetscMPIInt rank,size; 203 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 204 PetscErrorCode ierr; 205 Mat mat; 206 Mat_SeqAIJ *gmata; 207 PetscMPIInt tag; 208 MPI_Status status; 209 PetscBool aij; 210 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 211 212 PetscFunctionBegin; 213 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 215 if (!rank) { 216 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 217 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 218 } 219 if (reuse == MAT_INITIAL_MATRIX) { 220 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 221 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 222 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 223 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 224 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 225 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 226 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 227 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 228 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 229 230 rowners[0] = 0; 231 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 232 rstart = rowners[rank]; 233 rend = rowners[rank+1]; 234 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 235 if (!rank) { 236 gmata = (Mat_SeqAIJ*) gmat->data; 237 /* send row lengths to all processors */ 238 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 239 for (i=1; i<size; i++) { 240 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 /* determine number diagonal and off-diagonal counts */ 243 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 244 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 245 jj = 0; 246 for (i=0; i<m; i++) { 247 for (j=0; j<dlens[i]; j++) { 248 if (gmata->j[jj] < rstart) ld[i]++; 249 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 250 jj++; 251 } 252 } 253 /* send column indices to other processes */ 254 for (i=1; i<size; i++) { 255 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 256 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 257 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 258 } 259 260 /* send numerical values to other processes */ 261 for (i=1; i<size; i++) { 262 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 263 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 264 } 265 gmataa = gmata->a; 266 gmataj = gmata->j; 267 268 } else { 269 /* receive row lengths */ 270 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 271 /* receive column indices */ 272 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 273 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 275 /* determine number diagonal and off-diagonal counts */ 276 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 277 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 278 jj = 0; 279 for (i=0; i<m; i++) { 280 for (j=0; j<dlens[i]; j++) { 281 if (gmataj[jj] < rstart) ld[i]++; 282 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 283 jj++; 284 } 285 } 286 /* receive numerical values */ 287 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 288 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 289 } 290 /* set preallocation */ 291 for (i=0; i<m; i++) { 292 dlens[i] -= olens[i]; 293 } 294 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 295 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 296 297 for (i=0; i<m; i++) { 298 dlens[i] += olens[i]; 299 } 300 cnt = 0; 301 for (i=0; i<m; i++) { 302 row = rstart + i; 303 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 304 cnt += dlens[i]; 305 } 306 if (rank) { 307 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 308 } 309 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 310 ierr = PetscFree(rowners);CHKERRQ(ierr); 311 312 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 313 314 *inmat = mat; 315 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 316 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 317 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 318 mat = *inmat; 319 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 320 if (!rank) { 321 /* send numerical values to other processes */ 322 gmata = (Mat_SeqAIJ*) gmat->data; 323 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 324 gmataa = gmata->a; 325 for (i=1; i<size; i++) { 326 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 327 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 328 } 329 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 330 } else { 331 /* receive numerical values from process 0*/ 332 nz = Ad->nz + Ao->nz; 333 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 334 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 335 } 336 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 337 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 338 ad = Ad->a; 339 ao = Ao->a; 340 if (mat->rmap->n) { 341 i = 0; 342 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 343 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 344 } 345 for (i=1; i<mat->rmap->n; i++) { 346 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 347 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 348 } 349 i--; 350 if (mat->rmap->n) { 351 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 352 } 353 if (rank) { 354 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 355 } 356 } 357 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to acess. 368 */ 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 372 { 373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 374 PetscErrorCode ierr; 375 PetscInt n = aij->B->cmap->n,i; 376 377 PetscFunctionBegin; 378 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 379 #if defined(PETSC_USE_CTABLE) 380 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 381 for (i=0; i<n; i++) { 382 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 383 } 384 #else 385 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 386 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 387 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 388 #endif 389 PetscFunctionReturn(0); 390 } 391 392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 393 { \ 394 if (col <= lastcol1) low1 = 0; \ 395 else high1 = nrow1; \ 396 lastcol1 = col;\ 397 while (high1-low1 > 5) { \ 398 t = (low1+high1)/2; \ 399 if (rp1[t] > col) high1 = t; \ 400 else low1 = t; \ 401 } \ 402 for (_i=low1; _i<high1; _i++) { \ 403 if (rp1[_i] > col) break; \ 404 if (rp1[_i] == col) { \ 405 if (addv == ADD_VALUES) ap1[_i] += value; \ 406 else ap1[_i] = value; \ 407 goto a_noinsert; \ 408 } \ 409 } \ 410 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 411 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 412 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 413 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 414 N = nrow1++ - 1; a->nz++; high1++; \ 415 /* shift up all the later entries in this row */ \ 416 for (ii=N; ii>=_i; ii--) { \ 417 rp1[ii+1] = rp1[ii]; \ 418 ap1[ii+1] = ap1[ii]; \ 419 } \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++;\ 423 a_noinsert: ; \ 424 ailen[row] = nrow1; \ 425 } 426 427 428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 429 { \ 430 if (col <= lastcol2) low2 = 0; \ 431 else high2 = nrow2; \ 432 lastcol2 = col; \ 433 while (high2-low2 > 5) { \ 434 t = (low2+high2)/2; \ 435 if (rp2[t] > col) high2 = t; \ 436 else low2 = t; \ 437 } \ 438 for (_i=low2; _i<high2; _i++) { \ 439 if (rp2[_i] > col) break; \ 440 if (rp2[_i] == col) { \ 441 if (addv == ADD_VALUES) ap2[_i] += value; \ 442 else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 447 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 448 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 449 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 450 N = nrow2++ - 1; b->nz++; high2++; \ 451 /* shift up all the later entries in this row */ \ 452 for (ii=N; ii>=_i; ii--) { \ 453 rp2[ii+1] = rp2[ii]; \ 454 ap2[ii+1] = ap2[ii]; \ 455 } \ 456 rp2[_i] = col; \ 457 ap2[_i] = value; \ 458 B->nonzerostate++; \ 459 b_noinsert: ; \ 460 bilen[row] = nrow2; \ 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 466 { 467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 469 PetscErrorCode ierr; 470 PetscInt l,*garray = mat->garray,diag; 471 472 PetscFunctionBegin; 473 /* code only works for square matrices A */ 474 475 /* find size of row to the left of the diagonal part */ 476 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 477 row = row - diag; 478 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 479 if (garray[b->j[b->i[row]+l]] > diag) break; 480 } 481 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 482 483 /* diagonal part */ 484 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 485 486 /* right of diagonal part */ 487 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatSetValues_MPIAIJ" 493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 494 { 495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 496 PetscScalar value; 497 PetscErrorCode ierr; 498 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 499 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 500 PetscBool roworiented = aij->roworiented; 501 502 /* Some Variables required in the macro */ 503 Mat A = aij->A; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 505 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 506 MatScalar *aa = a->a; 507 PetscBool ignorezeroentries = a->ignorezeroentries; 508 Mat B = aij->B; 509 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 510 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 511 MatScalar *ba = b->a; 512 513 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 514 PetscInt nonew; 515 MatScalar *ap1,*ap2; 516 517 PetscFunctionBegin; 518 for (i=0; i<m; i++) { 519 if (im[i] < 0) continue; 520 #if defined(PETSC_USE_DEBUG) 521 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 522 #endif 523 if (im[i] >= rstart && im[i] < rend) { 524 row = im[i] - rstart; 525 lastcol1 = -1; 526 rp1 = aj + ai[row]; 527 ap1 = aa + ai[row]; 528 rmax1 = aimax[row]; 529 nrow1 = ailen[row]; 530 low1 = 0; 531 high1 = nrow1; 532 lastcol2 = -1; 533 rp2 = bj + bi[row]; 534 ap2 = ba + bi[row]; 535 rmax2 = bimax[row]; 536 nrow2 = bilen[row]; 537 low2 = 0; 538 high2 = nrow2; 539 540 for (j=0; j<n; j++) { 541 if (roworiented) value = v[i*n+j]; 542 else value = v[i+j*m]; 543 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 544 if (in[j] >= cstart && in[j] < cend) { 545 col = in[j] - cstart; 546 nonew = a->nonew; 547 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 548 } else if (in[j] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 551 #endif 552 else { 553 if (mat->was_assembled) { 554 if (!aij->colmap) { 555 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 556 } 557 #if defined(PETSC_USE_CTABLE) 558 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 559 col--; 560 #else 561 col = aij->colmap[in[j]] - 1; 562 #endif 563 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 564 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 565 col = in[j]; 566 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 567 B = aij->B; 568 b = (Mat_SeqAIJ*)B->data; 569 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 570 rp2 = bj + bi[row]; 571 ap2 = ba + bi[row]; 572 rmax2 = bimax[row]; 573 nrow2 = bilen[row]; 574 low2 = 0; 575 high2 = nrow2; 576 bm = aij->B->rmap->n; 577 ba = b->a; 578 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 579 } else col = in[j]; 580 nonew = b->nonew; 581 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 582 } 583 } 584 } else { 585 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 586 if (!aij->donotstash) { 587 mat->assembled = PETSC_FALSE; 588 if (roworiented) { 589 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 590 } else { 591 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 592 } 593 } 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatGetValues_MPIAIJ" 601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 602 { 603 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 604 PetscErrorCode ierr; 605 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 606 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 607 608 PetscFunctionBegin; 609 for (i=0; i<m; i++) { 610 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 611 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 612 if (idxm[i] >= rstart && idxm[i] < rend) { 613 row = idxm[i] - rstart; 614 for (j=0; j<n; j++) { 615 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 616 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 617 if (idxn[j] >= cstart && idxn[j] < cend) { 618 col = idxn[j] - cstart; 619 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } else { 621 if (!aij->colmap) { 622 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 623 } 624 #if defined(PETSC_USE_CTABLE) 625 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 626 col--; 627 #else 628 col = aij->colmap[idxn[j]] - 1; 629 #endif 630 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 631 else { 632 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } 634 } 635 } 636 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 637 } 638 PetscFunctionReturn(0); 639 } 640 641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 648 PetscErrorCode ierr; 649 PetscInt nstash,reallocs; 650 InsertMode addv; 651 652 PetscFunctionBegin; 653 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 654 655 /* make sure all processors are either in INSERTMODE or ADDMODE */ 656 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 657 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 658 mat->insertmode = addv; /* in case this processor had no cache */ 659 660 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 672 PetscErrorCode ierr; 673 PetscMPIInt n; 674 PetscInt i,j,rstart,ncols,flg; 675 PetscInt *row,*col; 676 PetscBool other_disassembled; 677 PetscScalar *val; 678 InsertMode addv = mat->insertmode; 679 680 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 681 682 PetscFunctionBegin; 683 if (!aij->donotstash && !mat->nooffprocentries) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 if (!flg) break; 687 688 for (i=0; i<n; ) { 689 /* Now identify the consecutive vals belonging to the same row */ 690 for (j=i,rstart=row[j]; j<n; j++) { 691 if (row[j] != rstart) break; 692 } 693 if (j < n) ncols = j-i; 694 else ncols = n-i; 695 /* Now assemble all these values with a single function call */ 696 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 697 698 i = j; 699 } 700 } 701 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 702 } 703 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 705 706 /* determine if any processor has disassembled, if so we must 707 also disassemble ourselfs, in order that we may reassemble. */ 708 /* 709 if nonzero structure of submatrix B cannot change then we know that 710 no processor disassembled thus we can skip this stuff 711 */ 712 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 713 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 714 if (mat->was_assembled && !other_disassembled) { 715 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 716 } 717 } 718 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 719 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 720 } 721 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 722 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 724 725 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 726 727 aij->rowvalues = 0; 728 729 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 730 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 731 732 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 733 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 734 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 735 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 742 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 743 { 744 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 749 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatZeroRows_MPIAIJ" 755 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 756 { 757 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 758 PetscInt *owners = A->rmap->range; 759 PetscInt n = A->rmap->n; 760 PetscSF sf; 761 PetscInt *lrows; 762 PetscSFNode *rrows; 763 PetscInt r, p = 0, len = 0; 764 PetscErrorCode ierr; 765 766 PetscFunctionBegin; 767 /* Create SF where leaves are input rows and roots are owned rows */ 768 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 769 for (r = 0; r < n; ++r) lrows[r] = -1; 770 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 771 for (r = 0; r < N; ++r) { 772 const PetscInt idx = rows[r]; 773 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 774 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 775 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 776 } 777 if (A->nooffproczerorows) { 778 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 779 lrows[len++] = idx - owners[p]; 780 } else { 781 rrows[r].rank = p; 782 rrows[r].index = rows[r] - owners[p]; 783 } 784 } 785 if (!A->nooffproczerorows) { 786 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 787 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 788 /* Collect flags for rows to be zeroed */ 789 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 790 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 791 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 792 /* Compress and put in row numbers */ 793 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 794 } 795 /* fix right hand side if needed */ 796 if (x && b) { 797 const PetscScalar *xx; 798 PetscScalar *bb; 799 800 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 801 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 802 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 803 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 804 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 805 } 806 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 807 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 808 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 809 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 810 } else if (diag != 0.0) { 811 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 812 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 813 for (r = 0; r < len; ++r) { 814 const PetscInt row = lrows[r] + A->rmap->rstart; 815 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 816 } 817 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 818 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819 } else { 820 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 821 } 822 ierr = PetscFree(lrows);CHKERRQ(ierr); 823 824 /* only change matrix nonzero state if pattern was allowed to be changed */ 825 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 826 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 827 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 828 } 829 PetscFunctionReturn(0); 830 } 831 832 #undef __FUNCT__ 833 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 834 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 835 { 836 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 837 PetscErrorCode ierr; 838 PetscMPIInt n = A->rmap->n; 839 PetscInt i,j,r,m,p = 0,len = 0; 840 PetscInt *lrows,*owners = A->rmap->range; 841 PetscSFNode *rrows; 842 PetscSF sf; 843 const PetscScalar *xx; 844 PetscScalar *bb,*mask; 845 Vec xmask,lmask; 846 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 847 const PetscInt *aj, *ii,*ridx; 848 PetscScalar *aa; 849 850 PetscFunctionBegin; 851 /* Create SF where leaves are input rows and roots are owned rows */ 852 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 853 for (r = 0; r < n; ++r) lrows[r] = -1; 854 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 855 for (r = 0; r < N; ++r) { 856 const PetscInt idx = rows[r]; 857 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 858 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 859 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 860 } 861 rrows[r].rank = p; 862 rrows[r].index = rows[r] - owners[p]; 863 } 864 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 865 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 866 /* Collect flags for rows to be zeroed */ 867 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 868 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 869 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 870 /* Compress and put in row numbers */ 871 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 872 /* zero diagonal part of matrix */ 873 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 874 /* handle off diagonal part of matrix */ 875 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 876 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 877 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 878 for (i=0; i<len; i++) bb[lrows[i]] = 1; 879 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 880 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 883 if (x) { 884 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 887 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 888 } 889 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 890 /* remove zeroed rows of off diagonal matrix */ 891 ii = aij->i; 892 for (i=0; i<len; i++) { 893 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 894 } 895 /* loop over all elements of off process part of matrix zeroing removed columns*/ 896 if (aij->compressedrow.use) { 897 m = aij->compressedrow.nrows; 898 ii = aij->compressedrow.i; 899 ridx = aij->compressedrow.rindex; 900 for (i=0; i<m; i++) { 901 n = ii[i+1] - ii[i]; 902 aj = aij->j + ii[i]; 903 aa = aij->a + ii[i]; 904 905 for (j=0; j<n; j++) { 906 if (PetscAbsScalar(mask[*aj])) { 907 if (b) bb[*ridx] -= *aa*xx[*aj]; 908 *aa = 0.0; 909 } 910 aa++; 911 aj++; 912 } 913 ridx++; 914 } 915 } else { /* do not use compressed row format */ 916 m = l->B->rmap->n; 917 for (i=0; i<m; i++) { 918 n = ii[i+1] - ii[i]; 919 aj = aij->j + ii[i]; 920 aa = aij->a + ii[i]; 921 for (j=0; j<n; j++) { 922 if (PetscAbsScalar(mask[*aj])) { 923 if (b) bb[i] -= *aa*xx[*aj]; 924 *aa = 0.0; 925 } 926 aa++; 927 aj++; 928 } 929 } 930 } 931 if (x) { 932 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 933 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 934 } 935 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 936 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 937 ierr = PetscFree(lrows);CHKERRQ(ierr); 938 939 /* only change matrix nonzero state if pattern was allowed to be changed */ 940 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 941 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 942 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 943 } 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatMult_MPIAIJ" 949 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 950 { 951 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 952 PetscErrorCode ierr; 953 PetscInt nt; 954 955 PetscFunctionBegin; 956 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 957 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 958 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 960 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 PetscErrorCode ierr; 971 972 PetscFunctionBegin; 973 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatMultAdd_MPIAIJ" 979 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 980 { 981 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 982 PetscErrorCode ierr; 983 984 PetscFunctionBegin; 985 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 987 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 988 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 994 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 995 { 996 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 997 PetscErrorCode ierr; 998 PetscBool merged; 999 1000 PetscFunctionBegin; 1001 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1002 /* do nondiagonal part */ 1003 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1004 if (!merged) { 1005 /* send it on its way */ 1006 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1007 /* do local part */ 1008 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1009 /* receive remote parts: note this assumes the values are not actually */ 1010 /* added in yy until the next line, */ 1011 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1012 } else { 1013 /* do local part */ 1014 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1015 /* send it on its way */ 1016 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1017 /* values actually were received in the Begin() but we need to call this nop */ 1018 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1019 } 1020 PetscFunctionReturn(0); 1021 } 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1025 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1026 { 1027 MPI_Comm comm; 1028 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1029 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1030 IS Me,Notme; 1031 PetscErrorCode ierr; 1032 PetscInt M,N,first,last,*notme,i; 1033 PetscMPIInt size; 1034 1035 PetscFunctionBegin; 1036 /* Easy test: symmetric diagonal block */ 1037 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1038 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1039 if (!*f) PetscFunctionReturn(0); 1040 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1041 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1042 if (size == 1) PetscFunctionReturn(0); 1043 1044 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1045 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1046 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1047 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1048 for (i=0; i<first; i++) notme[i] = i; 1049 for (i=last; i<M; i++) notme[i-last+first] = i; 1050 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1051 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1052 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1053 Aoff = Aoffs[0]; 1054 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1055 Boff = Boffs[0]; 1056 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1057 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1058 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1059 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1060 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1061 ierr = PetscFree(notme);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 1072 PetscFunctionBegin; 1073 /* do nondiagonal part */ 1074 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1075 /* send it on its way */ 1076 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1077 /* do local part */ 1078 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1079 /* receive remote parts */ 1080 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 /* 1085 This only works correctly for square matrices where the subblock A->A is the 1086 diagonal block 1087 */ 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1090 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1091 { 1092 PetscErrorCode ierr; 1093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1094 1095 PetscFunctionBegin; 1096 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1097 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1098 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatScale_MPIAIJ" 1104 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1105 { 1106 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1111 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatDestroy_MPIAIJ" 1117 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1118 { 1119 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1120 PetscErrorCode ierr; 1121 1122 PetscFunctionBegin; 1123 #if defined(PETSC_USE_LOG) 1124 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1125 #endif 1126 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1127 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1128 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1129 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1130 #if defined(PETSC_USE_CTABLE) 1131 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1132 #else 1133 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1134 #endif 1135 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1136 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1137 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1138 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1139 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1140 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1141 1142 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1151 #if defined(PETSC_HAVE_ELEMENTAL) 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1153 #endif 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1159 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1160 { 1161 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1162 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1163 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1164 PetscErrorCode ierr; 1165 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1166 int fd; 1167 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1168 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1169 PetscScalar *column_values; 1170 PetscInt message_count,flowcontrolcount; 1171 FILE *file; 1172 1173 PetscFunctionBegin; 1174 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1175 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1176 nz = A->nz + B->nz; 1177 if (!rank) { 1178 header[0] = MAT_FILE_CLASSID; 1179 header[1] = mat->rmap->N; 1180 header[2] = mat->cmap->N; 1181 1182 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1183 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1184 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1185 /* get largest number of rows any processor has */ 1186 rlen = mat->rmap->n; 1187 range = mat->rmap->range; 1188 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1189 } else { 1190 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1191 rlen = mat->rmap->n; 1192 } 1193 1194 /* load up the local row counts */ 1195 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1196 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1197 1198 /* store the row lengths to the file */ 1199 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1200 if (!rank) { 1201 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1202 for (i=1; i<size; i++) { 1203 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1204 rlen = range[i+1] - range[i]; 1205 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1206 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1207 } 1208 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1209 } else { 1210 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1211 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1213 } 1214 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1215 1216 /* load up the local column indices */ 1217 nzmax = nz; /* th processor needs space a largest processor needs */ 1218 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1220 cnt = 0; 1221 for (i=0; i<mat->rmap->n; i++) { 1222 for (j=B->i[i]; j<B->i[i+1]; j++) { 1223 if ((col = garray[B->j[j]]) > cstart) break; 1224 column_indices[cnt++] = col; 1225 } 1226 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1227 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1228 } 1229 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1230 1231 /* store the column indices to the file */ 1232 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1233 if (!rank) { 1234 MPI_Status status; 1235 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1236 for (i=1; i<size; i++) { 1237 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1238 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1239 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1240 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1241 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1242 } 1243 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1244 } else { 1245 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1246 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1247 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1248 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1249 } 1250 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1251 1252 /* load up the local column values */ 1253 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1254 cnt = 0; 1255 for (i=0; i<mat->rmap->n; i++) { 1256 for (j=B->i[i]; j<B->i[i+1]; j++) { 1257 if (garray[B->j[j]] > cstart) break; 1258 column_values[cnt++] = B->a[j]; 1259 } 1260 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1261 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1262 } 1263 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1264 1265 /* store the column values to the file */ 1266 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1267 if (!rank) { 1268 MPI_Status status; 1269 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1270 for (i=1; i<size; i++) { 1271 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1272 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1273 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1274 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1275 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1276 } 1277 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1278 } else { 1279 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1280 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1281 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1282 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1283 } 1284 ierr = PetscFree(column_values);CHKERRQ(ierr); 1285 1286 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1287 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #include <petscdraw.h> 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1294 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1295 { 1296 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1297 PetscErrorCode ierr; 1298 PetscMPIInt rank = aij->rank,size = aij->size; 1299 PetscBool isdraw,iascii,isbinary; 1300 PetscViewer sviewer; 1301 PetscViewerFormat format; 1302 1303 PetscFunctionBegin; 1304 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1305 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1306 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1307 if (iascii) { 1308 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1309 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1310 MatInfo info; 1311 PetscBool inodes; 1312 1313 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1314 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1315 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1317 if (!inodes) { 1318 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1319 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1320 } else { 1321 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1322 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1323 } 1324 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1326 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1328 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1329 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1330 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1331 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1334 PetscInt inodecount,inodelimit,*inodes; 1335 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1336 if (inodes) { 1337 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1338 } else { 1339 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1343 PetscFunctionReturn(0); 1344 } 1345 } else if (isbinary) { 1346 if (size == 1) { 1347 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1348 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1349 } else { 1350 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1351 } 1352 PetscFunctionReturn(0); 1353 } else if (isdraw) { 1354 PetscDraw draw; 1355 PetscBool isnull; 1356 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1357 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1358 } 1359 1360 { 1361 /* assemble the entire matrix onto first processor. */ 1362 Mat A; 1363 Mat_SeqAIJ *Aloc; 1364 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1365 MatScalar *a; 1366 1367 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1368 if (!rank) { 1369 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1370 } else { 1371 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1372 } 1373 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1374 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1375 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1376 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1377 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1378 1379 /* copy over the A part */ 1380 Aloc = (Mat_SeqAIJ*)aij->A->data; 1381 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1382 row = mat->rmap->rstart; 1383 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1384 for (i=0; i<m; i++) { 1385 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1386 row++; 1387 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1388 } 1389 aj = Aloc->j; 1390 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1391 1392 /* copy over the B part */ 1393 Aloc = (Mat_SeqAIJ*)aij->B->data; 1394 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1395 row = mat->rmap->rstart; 1396 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1397 ct = cols; 1398 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1399 for (i=0; i<m; i++) { 1400 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1401 row++; 1402 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1403 } 1404 ierr = PetscFree(ct);CHKERRQ(ierr); 1405 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1406 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1407 /* 1408 Everyone has to call to draw the matrix since the graphics waits are 1409 synchronized across all processors that share the PetscDraw object 1410 */ 1411 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1412 if (!rank) { 1413 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1414 } 1415 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1416 ierr = MatDestroy(&A);CHKERRQ(ierr); 1417 } 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatView_MPIAIJ" 1423 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1424 { 1425 PetscErrorCode ierr; 1426 PetscBool iascii,isdraw,issocket,isbinary; 1427 1428 PetscFunctionBegin; 1429 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1430 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1431 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1433 if (iascii || isdraw || isbinary || issocket) { 1434 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1435 } 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatSOR_MPIAIJ" 1441 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1442 { 1443 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1444 PetscErrorCode ierr; 1445 Vec bb1 = 0; 1446 PetscBool hasop; 1447 1448 PetscFunctionBegin; 1449 if (flag == SOR_APPLY_UPPER) { 1450 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1451 PetscFunctionReturn(0); 1452 } 1453 1454 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1455 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1456 } 1457 1458 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1461 its--; 1462 } 1463 1464 while (its--) { 1465 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1466 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1467 1468 /* update rhs: bb1 = bb - B*x */ 1469 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1470 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1471 1472 /* local sweep */ 1473 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1474 } 1475 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1476 if (flag & SOR_ZERO_INITIAL_GUESS) { 1477 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1478 its--; 1479 } 1480 while (its--) { 1481 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1482 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1483 1484 /* update rhs: bb1 = bb - B*x */ 1485 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1486 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1487 1488 /* local sweep */ 1489 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1490 } 1491 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1492 if (flag & SOR_ZERO_INITIAL_GUESS) { 1493 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1494 its--; 1495 } 1496 while (its--) { 1497 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1499 1500 /* update rhs: bb1 = bb - B*x */ 1501 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1502 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1503 1504 /* local sweep */ 1505 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1506 } 1507 } else if (flag & SOR_EISENSTAT) { 1508 Vec xx1; 1509 1510 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1511 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1512 1513 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 if (!mat->diag) { 1516 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1517 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1518 } 1519 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1520 if (hasop) { 1521 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1522 } else { 1523 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1524 } 1525 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1526 1527 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1528 1529 /* local sweep */ 1530 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1531 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1532 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1533 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1534 1535 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatPermute_MPIAIJ" 1541 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1542 { 1543 Mat aA,aB,Aperm; 1544 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1545 PetscScalar *aa,*ba; 1546 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1547 PetscSF rowsf,sf; 1548 IS parcolp = NULL; 1549 PetscBool done; 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1554 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1555 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1556 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1557 1558 /* Invert row permutation to find out where my rows should go */ 1559 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1560 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1561 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1562 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1563 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1564 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1565 1566 /* Invert column permutation to find out where my columns should go */ 1567 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1568 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1569 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1570 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1571 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1572 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1573 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1574 1575 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1576 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1577 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1578 1579 /* Find out where my gcols should go */ 1580 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1581 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1582 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1583 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1584 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1585 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1586 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1587 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1588 1589 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1590 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1591 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1592 for (i=0; i<m; i++) { 1593 PetscInt row = rdest[i],rowner; 1594 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1595 for (j=ai[i]; j<ai[i+1]; j++) { 1596 PetscInt cowner,col = cdest[aj[j]]; 1597 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1598 if (rowner == cowner) dnnz[i]++; 1599 else onnz[i]++; 1600 } 1601 for (j=bi[i]; j<bi[i+1]; j++) { 1602 PetscInt cowner,col = gcdest[bj[j]]; 1603 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1604 if (rowner == cowner) dnnz[i]++; 1605 else onnz[i]++; 1606 } 1607 } 1608 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1609 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1610 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1611 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1612 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1613 1614 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1615 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1616 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1617 for (i=0; i<m; i++) { 1618 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1619 PetscInt j0,rowlen; 1620 rowlen = ai[i+1] - ai[i]; 1621 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1622 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1623 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1624 } 1625 rowlen = bi[i+1] - bi[i]; 1626 for (j0=j=0; j<rowlen; j0=j) { 1627 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1628 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1629 } 1630 } 1631 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1632 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1633 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1634 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1635 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1636 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1637 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1638 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1639 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1640 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1641 *B = Aperm; 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1647 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1648 { 1649 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1650 Mat A = mat->A,B = mat->B; 1651 PetscErrorCode ierr; 1652 PetscReal isend[5],irecv[5]; 1653 1654 PetscFunctionBegin; 1655 info->block_size = 1.0; 1656 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1657 1658 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1659 isend[3] = info->memory; isend[4] = info->mallocs; 1660 1661 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1662 1663 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1664 isend[3] += info->memory; isend[4] += info->mallocs; 1665 if (flag == MAT_LOCAL) { 1666 info->nz_used = isend[0]; 1667 info->nz_allocated = isend[1]; 1668 info->nz_unneeded = isend[2]; 1669 info->memory = isend[3]; 1670 info->mallocs = isend[4]; 1671 } else if (flag == MAT_GLOBAL_MAX) { 1672 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1673 1674 info->nz_used = irecv[0]; 1675 info->nz_allocated = irecv[1]; 1676 info->nz_unneeded = irecv[2]; 1677 info->memory = irecv[3]; 1678 info->mallocs = irecv[4]; 1679 } else if (flag == MAT_GLOBAL_SUM) { 1680 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1681 1682 info->nz_used = irecv[0]; 1683 info->nz_allocated = irecv[1]; 1684 info->nz_unneeded = irecv[2]; 1685 info->memory = irecv[3]; 1686 info->mallocs = irecv[4]; 1687 } 1688 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1689 info->fill_ratio_needed = 0; 1690 info->factor_mallocs = 0; 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatSetOption_MPIAIJ" 1696 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1697 { 1698 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 switch (op) { 1703 case MAT_NEW_NONZERO_LOCATIONS: 1704 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1705 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1706 case MAT_KEEP_NONZERO_PATTERN: 1707 case MAT_NEW_NONZERO_LOCATION_ERR: 1708 case MAT_USE_INODES: 1709 case MAT_IGNORE_ZERO_ENTRIES: 1710 MatCheckPreallocated(A,1); 1711 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1712 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1713 break; 1714 case MAT_ROW_ORIENTED: 1715 a->roworiented = flg; 1716 1717 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1718 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1719 break; 1720 case MAT_NEW_DIAGONALS: 1721 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1722 break; 1723 case MAT_IGNORE_OFF_PROC_ENTRIES: 1724 a->donotstash = flg; 1725 break; 1726 case MAT_SPD: 1727 A->spd_set = PETSC_TRUE; 1728 A->spd = flg; 1729 if (flg) { 1730 A->symmetric = PETSC_TRUE; 1731 A->structurally_symmetric = PETSC_TRUE; 1732 A->symmetric_set = PETSC_TRUE; 1733 A->structurally_symmetric_set = PETSC_TRUE; 1734 } 1735 break; 1736 case MAT_SYMMETRIC: 1737 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1738 break; 1739 case MAT_STRUCTURALLY_SYMMETRIC: 1740 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1741 break; 1742 case MAT_HERMITIAN: 1743 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1744 break; 1745 case MAT_SYMMETRY_ETERNAL: 1746 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1747 break; 1748 default: 1749 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1750 } 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatGetRow_MPIAIJ" 1756 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1757 { 1758 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1759 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1760 PetscErrorCode ierr; 1761 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1762 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1763 PetscInt *cmap,*idx_p; 1764 1765 PetscFunctionBegin; 1766 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1767 mat->getrowactive = PETSC_TRUE; 1768 1769 if (!mat->rowvalues && (idx || v)) { 1770 /* 1771 allocate enough space to hold information from the longest row. 1772 */ 1773 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1774 PetscInt max = 1,tmp; 1775 for (i=0; i<matin->rmap->n; i++) { 1776 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1777 if (max < tmp) max = tmp; 1778 } 1779 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1780 } 1781 1782 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1783 lrow = row - rstart; 1784 1785 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1786 if (!v) {pvA = 0; pvB = 0;} 1787 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1788 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1789 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1790 nztot = nzA + nzB; 1791 1792 cmap = mat->garray; 1793 if (v || idx) { 1794 if (nztot) { 1795 /* Sort by increasing column numbers, assuming A and B already sorted */ 1796 PetscInt imark = -1; 1797 if (v) { 1798 *v = v_p = mat->rowvalues; 1799 for (i=0; i<nzB; i++) { 1800 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1801 else break; 1802 } 1803 imark = i; 1804 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1805 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1806 } 1807 if (idx) { 1808 *idx = idx_p = mat->rowindices; 1809 if (imark > -1) { 1810 for (i=0; i<imark; i++) { 1811 idx_p[i] = cmap[cworkB[i]]; 1812 } 1813 } else { 1814 for (i=0; i<nzB; i++) { 1815 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1816 else break; 1817 } 1818 imark = i; 1819 } 1820 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1821 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1822 } 1823 } else { 1824 if (idx) *idx = 0; 1825 if (v) *v = 0; 1826 } 1827 } 1828 *nz = nztot; 1829 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1830 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1836 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1837 { 1838 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1839 1840 PetscFunctionBegin; 1841 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1842 aij->getrowactive = PETSC_FALSE; 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatNorm_MPIAIJ" 1848 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1849 { 1850 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1851 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1852 PetscErrorCode ierr; 1853 PetscInt i,j,cstart = mat->cmap->rstart; 1854 PetscReal sum = 0.0; 1855 MatScalar *v; 1856 1857 PetscFunctionBegin; 1858 if (aij->size == 1) { 1859 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1860 } else { 1861 if (type == NORM_FROBENIUS) { 1862 v = amat->a; 1863 for (i=0; i<amat->nz; i++) { 1864 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1865 } 1866 v = bmat->a; 1867 for (i=0; i<bmat->nz; i++) { 1868 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1869 } 1870 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1871 *norm = PetscSqrtReal(*norm); 1872 } else if (type == NORM_1) { /* max column norm */ 1873 PetscReal *tmp,*tmp2; 1874 PetscInt *jj,*garray = aij->garray; 1875 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1876 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1877 *norm = 0.0; 1878 v = amat->a; jj = amat->j; 1879 for (j=0; j<amat->nz; j++) { 1880 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1881 } 1882 v = bmat->a; jj = bmat->j; 1883 for (j=0; j<bmat->nz; j++) { 1884 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1885 } 1886 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1887 for (j=0; j<mat->cmap->N; j++) { 1888 if (tmp2[j] > *norm) *norm = tmp2[j]; 1889 } 1890 ierr = PetscFree(tmp);CHKERRQ(ierr); 1891 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1892 } else if (type == NORM_INFINITY) { /* max row norm */ 1893 PetscReal ntemp = 0.0; 1894 for (j=0; j<aij->A->rmap->n; j++) { 1895 v = amat->a + amat->i[j]; 1896 sum = 0.0; 1897 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1898 sum += PetscAbsScalar(*v); v++; 1899 } 1900 v = bmat->a + bmat->i[j]; 1901 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1902 sum += PetscAbsScalar(*v); v++; 1903 } 1904 if (sum > ntemp) ntemp = sum; 1905 } 1906 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1907 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1908 } 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatTranspose_MPIAIJ" 1914 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1915 { 1916 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1917 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1918 PetscErrorCode ierr; 1919 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1920 PetscInt cstart = A->cmap->rstart,ncol; 1921 Mat B; 1922 MatScalar *array; 1923 1924 PetscFunctionBegin; 1925 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1926 1927 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1928 ai = Aloc->i; aj = Aloc->j; 1929 bi = Bloc->i; bj = Bloc->j; 1930 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1931 PetscInt *d_nnz,*g_nnz,*o_nnz; 1932 PetscSFNode *oloc; 1933 PETSC_UNUSED PetscSF sf; 1934 1935 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1936 /* compute d_nnz for preallocation */ 1937 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1938 for (i=0; i<ai[ma]; i++) { 1939 d_nnz[aj[i]]++; 1940 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1941 } 1942 /* compute local off-diagonal contributions */ 1943 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1944 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1945 /* map those to global */ 1946 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1947 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1948 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1949 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1950 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1951 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1952 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1953 1954 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1955 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1956 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1957 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1958 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1959 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1960 } else { 1961 B = *matout; 1962 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1963 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1964 } 1965 1966 /* copy over the A part */ 1967 array = Aloc->a; 1968 row = A->rmap->rstart; 1969 for (i=0; i<ma; i++) { 1970 ncol = ai[i+1]-ai[i]; 1971 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1972 row++; 1973 array += ncol; aj += ncol; 1974 } 1975 aj = Aloc->j; 1976 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1977 1978 /* copy over the B part */ 1979 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1980 array = Bloc->a; 1981 row = A->rmap->rstart; 1982 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1983 cols_tmp = cols; 1984 for (i=0; i<mb; i++) { 1985 ncol = bi[i+1]-bi[i]; 1986 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1987 row++; 1988 array += ncol; cols_tmp += ncol; 1989 } 1990 ierr = PetscFree(cols);CHKERRQ(ierr); 1991 1992 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1993 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1994 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1995 *matout = B; 1996 } else { 1997 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1998 } 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2004 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2005 { 2006 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2007 Mat a = aij->A,b = aij->B; 2008 PetscErrorCode ierr; 2009 PetscInt s1,s2,s3; 2010 2011 PetscFunctionBegin; 2012 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2013 if (rr) { 2014 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2015 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2016 /* Overlap communication with computation. */ 2017 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2018 } 2019 if (ll) { 2020 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2021 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2022 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2023 } 2024 /* scale the diagonal block */ 2025 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2026 2027 if (rr) { 2028 /* Do a scatter end and then right scale the off-diagonal block */ 2029 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2030 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2031 } 2032 PetscFunctionReturn(0); 2033 } 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2037 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2038 { 2039 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2040 PetscErrorCode ierr; 2041 2042 PetscFunctionBegin; 2043 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2044 PetscFunctionReturn(0); 2045 } 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatEqual_MPIAIJ" 2049 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2050 { 2051 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2052 Mat a,b,c,d; 2053 PetscBool flg; 2054 PetscErrorCode ierr; 2055 2056 PetscFunctionBegin; 2057 a = matA->A; b = matA->B; 2058 c = matB->A; d = matB->B; 2059 2060 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2061 if (flg) { 2062 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2063 } 2064 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatCopy_MPIAIJ" 2070 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2071 { 2072 PetscErrorCode ierr; 2073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2074 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2075 2076 PetscFunctionBegin; 2077 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2078 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2079 /* because of the column compression in the off-processor part of the matrix a->B, 2080 the number of columns in a->B and b->B may be different, hence we cannot call 2081 the MatCopy() directly on the two parts. If need be, we can provide a more 2082 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2083 then copying the submatrices */ 2084 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2085 } else { 2086 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2087 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2088 } 2089 PetscFunctionReturn(0); 2090 } 2091 2092 #undef __FUNCT__ 2093 #define __FUNCT__ "MatSetUp_MPIAIJ" 2094 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2095 { 2096 PetscErrorCode ierr; 2097 2098 PetscFunctionBegin; 2099 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2100 PetscFunctionReturn(0); 2101 } 2102 2103 /* 2104 Computes the number of nonzeros per row needed for preallocation when X and Y 2105 have different nonzero structure. 2106 */ 2107 #undef __FUNCT__ 2108 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2109 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2110 { 2111 PetscInt i,j,k,nzx,nzy; 2112 2113 PetscFunctionBegin; 2114 /* Set the number of nonzeros in the new matrix */ 2115 for (i=0; i<m; i++) { 2116 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2117 nzx = xi[i+1] - xi[i]; 2118 nzy = yi[i+1] - yi[i]; 2119 nnz[i] = 0; 2120 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2121 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2122 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2123 nnz[i]++; 2124 } 2125 for (; k<nzy; k++) nnz[i]++; 2126 } 2127 PetscFunctionReturn(0); 2128 } 2129 2130 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2133 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2134 { 2135 PetscErrorCode ierr; 2136 PetscInt m = Y->rmap->N; 2137 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2138 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2139 2140 PetscFunctionBegin; 2141 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "MatAXPY_MPIAIJ" 2147 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2151 PetscBLASInt bnz,one=1; 2152 Mat_SeqAIJ *x,*y; 2153 2154 PetscFunctionBegin; 2155 if (str == SAME_NONZERO_PATTERN) { 2156 PetscScalar alpha = a; 2157 x = (Mat_SeqAIJ*)xx->A->data; 2158 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2159 y = (Mat_SeqAIJ*)yy->A->data; 2160 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2161 x = (Mat_SeqAIJ*)xx->B->data; 2162 y = (Mat_SeqAIJ*)yy->B->data; 2163 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2164 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2165 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2166 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2167 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2168 } else { 2169 Mat B; 2170 PetscInt *nnz_d,*nnz_o; 2171 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2172 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2173 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2174 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2175 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2176 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2177 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2178 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2179 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2180 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2181 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2182 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2183 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2184 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2185 } 2186 PetscFunctionReturn(0); 2187 } 2188 2189 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2190 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "MatConjugate_MPIAIJ" 2193 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2194 { 2195 #if defined(PETSC_USE_COMPLEX) 2196 PetscErrorCode ierr; 2197 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2198 2199 PetscFunctionBegin; 2200 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2201 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2202 #else 2203 PetscFunctionBegin; 2204 #endif 2205 PetscFunctionReturn(0); 2206 } 2207 2208 #undef __FUNCT__ 2209 #define __FUNCT__ "MatRealPart_MPIAIJ" 2210 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2211 { 2212 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2213 PetscErrorCode ierr; 2214 2215 PetscFunctionBegin; 2216 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2217 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2223 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2224 { 2225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2226 PetscErrorCode ierr; 2227 2228 PetscFunctionBegin; 2229 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2230 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 #if defined(PETSC_HAVE_PBGL) 2235 2236 #include <boost/parallel/mpi/bsp_process_group.hpp> 2237 #include <boost/graph/distributed/ilu_default_graph.hpp> 2238 #include <boost/graph/distributed/ilu_0_block.hpp> 2239 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2240 #include <boost/graph/distributed/petsc/interface.hpp> 2241 #include <boost/multi_array.hpp> 2242 #include <boost/parallel/distributed_property_map->hpp> 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2246 /* 2247 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2248 */ 2249 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2250 { 2251 namespace petsc = boost::distributed::petsc; 2252 2253 namespace graph_dist = boost::graph::distributed; 2254 using boost::graph::distributed::ilu_default::process_group_type; 2255 using boost::graph::ilu_permuted; 2256 2257 PetscBool row_identity, col_identity; 2258 PetscContainer c; 2259 PetscInt m, n, M, N; 2260 PetscErrorCode ierr; 2261 2262 PetscFunctionBegin; 2263 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2264 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2265 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2266 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2267 2268 process_group_type pg; 2269 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2270 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2271 lgraph_type& level_graph = *lgraph_p; 2272 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2273 2274 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2275 ilu_permuted(level_graph); 2276 2277 /* put together the new matrix */ 2278 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2279 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2280 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2281 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2282 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2283 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2284 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2285 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2286 2287 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2288 ierr = PetscContainerSetPointer(c, lgraph_p); 2289 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2290 ierr = PetscContainerDestroy(&c); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2296 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2297 { 2298 PetscFunctionBegin; 2299 PetscFunctionReturn(0); 2300 } 2301 2302 #undef __FUNCT__ 2303 #define __FUNCT__ "MatSolve_MPIAIJ" 2304 /* 2305 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2306 */ 2307 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2308 { 2309 namespace graph_dist = boost::graph::distributed; 2310 2311 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2312 lgraph_type *lgraph_p; 2313 PetscContainer c; 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2318 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2319 ierr = VecCopy(b, x);CHKERRQ(ierr); 2320 2321 PetscScalar *array_x; 2322 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2323 PetscInt sx; 2324 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2325 2326 PetscScalar *array_b; 2327 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2328 PetscInt sb; 2329 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2330 2331 lgraph_type& level_graph = *lgraph_p; 2332 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2333 2334 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2335 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2336 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2337 2338 typedef boost::iterator_property_map<array_ref_type::iterator, 2339 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2340 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2341 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2342 2343 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2344 PetscFunctionReturn(0); 2345 } 2346 #endif 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2350 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2351 { 2352 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2353 PetscErrorCode ierr; 2354 PetscInt i,*idxb = 0; 2355 PetscScalar *va,*vb; 2356 Vec vtmp; 2357 2358 PetscFunctionBegin; 2359 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2360 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2361 if (idx) { 2362 for (i=0; i<A->rmap->n; i++) { 2363 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2364 } 2365 } 2366 2367 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2368 if (idx) { 2369 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2370 } 2371 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2372 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2373 2374 for (i=0; i<A->rmap->n; i++) { 2375 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2376 va[i] = vb[i]; 2377 if (idx) idx[i] = a->garray[idxb[i]]; 2378 } 2379 } 2380 2381 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2382 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2383 ierr = PetscFree(idxb);CHKERRQ(ierr); 2384 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2390 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2391 { 2392 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2393 PetscErrorCode ierr; 2394 PetscInt i,*idxb = 0; 2395 PetscScalar *va,*vb; 2396 Vec vtmp; 2397 2398 PetscFunctionBegin; 2399 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2400 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2401 if (idx) { 2402 for (i=0; i<A->cmap->n; i++) { 2403 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2404 } 2405 } 2406 2407 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2408 if (idx) { 2409 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2410 } 2411 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2412 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2413 2414 for (i=0; i<A->rmap->n; i++) { 2415 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2416 va[i] = vb[i]; 2417 if (idx) idx[i] = a->garray[idxb[i]]; 2418 } 2419 } 2420 2421 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2422 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2423 ierr = PetscFree(idxb);CHKERRQ(ierr); 2424 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2430 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2431 { 2432 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2433 PetscInt n = A->rmap->n; 2434 PetscInt cstart = A->cmap->rstart; 2435 PetscInt *cmap = mat->garray; 2436 PetscInt *diagIdx, *offdiagIdx; 2437 Vec diagV, offdiagV; 2438 PetscScalar *a, *diagA, *offdiagA; 2439 PetscInt r; 2440 PetscErrorCode ierr; 2441 2442 PetscFunctionBegin; 2443 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2444 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2445 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2446 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2447 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2448 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2449 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2450 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2451 for (r = 0; r < n; ++r) { 2452 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2453 a[r] = diagA[r]; 2454 idx[r] = cstart + diagIdx[r]; 2455 } else { 2456 a[r] = offdiagA[r]; 2457 idx[r] = cmap[offdiagIdx[r]]; 2458 } 2459 } 2460 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2461 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2462 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2463 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2464 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2465 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2466 PetscFunctionReturn(0); 2467 } 2468 2469 #undef __FUNCT__ 2470 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2471 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2472 { 2473 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2474 PetscInt n = A->rmap->n; 2475 PetscInt cstart = A->cmap->rstart; 2476 PetscInt *cmap = mat->garray; 2477 PetscInt *diagIdx, *offdiagIdx; 2478 Vec diagV, offdiagV; 2479 PetscScalar *a, *diagA, *offdiagA; 2480 PetscInt r; 2481 PetscErrorCode ierr; 2482 2483 PetscFunctionBegin; 2484 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2485 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2486 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2487 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2488 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2489 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2490 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2491 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2492 for (r = 0; r < n; ++r) { 2493 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2494 a[r] = diagA[r]; 2495 idx[r] = cstart + diagIdx[r]; 2496 } else { 2497 a[r] = offdiagA[r]; 2498 idx[r] = cmap[offdiagIdx[r]]; 2499 } 2500 } 2501 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2502 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2503 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2504 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2505 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2506 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2507 PetscFunctionReturn(0); 2508 } 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2512 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2513 { 2514 PetscErrorCode ierr; 2515 Mat *dummy; 2516 2517 PetscFunctionBegin; 2518 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2519 *newmat = *dummy; 2520 ierr = PetscFree(dummy);CHKERRQ(ierr); 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2526 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2527 { 2528 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2529 PetscErrorCode ierr; 2530 2531 PetscFunctionBegin; 2532 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2533 PetscFunctionReturn(0); 2534 } 2535 2536 #undef __FUNCT__ 2537 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2538 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2539 { 2540 PetscErrorCode ierr; 2541 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2542 2543 PetscFunctionBegin; 2544 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2545 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2546 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2547 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2548 PetscFunctionReturn(0); 2549 } 2550 2551 /* -------------------------------------------------------------------*/ 2552 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2553 MatGetRow_MPIAIJ, 2554 MatRestoreRow_MPIAIJ, 2555 MatMult_MPIAIJ, 2556 /* 4*/ MatMultAdd_MPIAIJ, 2557 MatMultTranspose_MPIAIJ, 2558 MatMultTransposeAdd_MPIAIJ, 2559 #if defined(PETSC_HAVE_PBGL) 2560 MatSolve_MPIAIJ, 2561 #else 2562 0, 2563 #endif 2564 0, 2565 0, 2566 /*10*/ 0, 2567 0, 2568 0, 2569 MatSOR_MPIAIJ, 2570 MatTranspose_MPIAIJ, 2571 /*15*/ MatGetInfo_MPIAIJ, 2572 MatEqual_MPIAIJ, 2573 MatGetDiagonal_MPIAIJ, 2574 MatDiagonalScale_MPIAIJ, 2575 MatNorm_MPIAIJ, 2576 /*20*/ MatAssemblyBegin_MPIAIJ, 2577 MatAssemblyEnd_MPIAIJ, 2578 MatSetOption_MPIAIJ, 2579 MatZeroEntries_MPIAIJ, 2580 /*24*/ MatZeroRows_MPIAIJ, 2581 0, 2582 #if defined(PETSC_HAVE_PBGL) 2583 0, 2584 #else 2585 0, 2586 #endif 2587 0, 2588 0, 2589 /*29*/ MatSetUp_MPIAIJ, 2590 #if defined(PETSC_HAVE_PBGL) 2591 0, 2592 #else 2593 0, 2594 #endif 2595 0, 2596 0, 2597 0, 2598 /*34*/ MatDuplicate_MPIAIJ, 2599 0, 2600 0, 2601 0, 2602 0, 2603 /*39*/ MatAXPY_MPIAIJ, 2604 MatGetSubMatrices_MPIAIJ, 2605 MatIncreaseOverlap_MPIAIJ, 2606 MatGetValues_MPIAIJ, 2607 MatCopy_MPIAIJ, 2608 /*44*/ MatGetRowMax_MPIAIJ, 2609 MatScale_MPIAIJ, 2610 0, 2611 MatDiagonalSet_MPIAIJ, 2612 MatZeroRowsColumns_MPIAIJ, 2613 /*49*/ MatSetRandom_MPIAIJ, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*54*/ MatFDColoringCreate_MPIXAIJ, 2619 0, 2620 MatSetUnfactored_MPIAIJ, 2621 MatPermute_MPIAIJ, 2622 0, 2623 /*59*/ MatGetSubMatrix_MPIAIJ, 2624 MatDestroy_MPIAIJ, 2625 MatView_MPIAIJ, 2626 0, 2627 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2628 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2629 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2630 0, 2631 0, 2632 0, 2633 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2634 MatGetRowMinAbs_MPIAIJ, 2635 0, 2636 MatSetColoring_MPIAIJ, 2637 0, 2638 MatSetValuesAdifor_MPIAIJ, 2639 /*75*/ MatFDColoringApply_AIJ, 2640 0, 2641 0, 2642 0, 2643 MatFindZeroDiagonals_MPIAIJ, 2644 /*80*/ 0, 2645 0, 2646 0, 2647 /*83*/ MatLoad_MPIAIJ, 2648 0, 2649 0, 2650 0, 2651 0, 2652 0, 2653 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2654 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2655 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2656 MatPtAP_MPIAIJ_MPIAIJ, 2657 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2658 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*99*/ 0, 2664 0, 2665 0, 2666 MatConjugate_MPIAIJ, 2667 0, 2668 /*104*/MatSetValuesRow_MPIAIJ, 2669 MatRealPart_MPIAIJ, 2670 MatImaginaryPart_MPIAIJ, 2671 0, 2672 0, 2673 /*109*/0, 2674 0, 2675 MatGetRowMin_MPIAIJ, 2676 0, 2677 0, 2678 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /*119*/0, 2684 0, 2685 0, 2686 0, 2687 MatGetMultiProcBlock_MPIAIJ, 2688 /*124*/MatFindNonzeroRows_MPIAIJ, 2689 MatGetColumnNorms_MPIAIJ, 2690 MatInvertBlockDiagonal_MPIAIJ, 2691 0, 2692 MatGetSubMatricesParallel_MPIAIJ, 2693 /*129*/0, 2694 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2695 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2696 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2697 0, 2698 /*134*/0, 2699 0, 2700 0, 2701 0, 2702 0, 2703 /*139*/0, 2704 0, 2705 0, 2706 MatFDColoringSetUp_MPIXAIJ, 2707 0, 2708 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2709 }; 2710 2711 /* ----------------------------------------------------------------------------------------*/ 2712 2713 #undef __FUNCT__ 2714 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2715 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2716 { 2717 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2718 PetscErrorCode ierr; 2719 2720 PetscFunctionBegin; 2721 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2722 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2723 PetscFunctionReturn(0); 2724 } 2725 2726 #undef __FUNCT__ 2727 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2728 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2729 { 2730 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2731 PetscErrorCode ierr; 2732 2733 PetscFunctionBegin; 2734 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2735 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2736 PetscFunctionReturn(0); 2737 } 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2741 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2742 { 2743 Mat_MPIAIJ *b; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2748 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2749 b = (Mat_MPIAIJ*)B->data; 2750 2751 if (!B->preallocated) { 2752 /* Explicitly create 2 MATSEQAIJ matrices. */ 2753 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2754 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2755 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2756 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2757 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2758 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2759 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2760 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2761 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2762 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2763 } 2764 2765 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2766 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2767 B->preallocated = PETSC_TRUE; 2768 PetscFunctionReturn(0); 2769 } 2770 2771 #undef __FUNCT__ 2772 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2773 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2774 { 2775 Mat mat; 2776 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2777 PetscErrorCode ierr; 2778 2779 PetscFunctionBegin; 2780 *newmat = 0; 2781 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2782 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2783 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2784 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2785 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2786 a = (Mat_MPIAIJ*)mat->data; 2787 2788 mat->factortype = matin->factortype; 2789 mat->assembled = PETSC_TRUE; 2790 mat->insertmode = NOT_SET_VALUES; 2791 mat->preallocated = PETSC_TRUE; 2792 2793 a->size = oldmat->size; 2794 a->rank = oldmat->rank; 2795 a->donotstash = oldmat->donotstash; 2796 a->roworiented = oldmat->roworiented; 2797 a->rowindices = 0; 2798 a->rowvalues = 0; 2799 a->getrowactive = PETSC_FALSE; 2800 2801 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2802 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2803 2804 if (oldmat->colmap) { 2805 #if defined(PETSC_USE_CTABLE) 2806 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2807 #else 2808 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 2809 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2810 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2811 #endif 2812 } else a->colmap = 0; 2813 if (oldmat->garray) { 2814 PetscInt len; 2815 len = oldmat->B->cmap->n; 2816 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 2817 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2818 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2819 } else a->garray = 0; 2820 2821 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2822 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2823 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2824 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2825 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2826 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2827 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2828 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2829 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2830 *newmat = mat; 2831 PetscFunctionReturn(0); 2832 } 2833 2834 2835 2836 #undef __FUNCT__ 2837 #define __FUNCT__ "MatLoad_MPIAIJ" 2838 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2839 { 2840 PetscScalar *vals,*svals; 2841 MPI_Comm comm; 2842 PetscErrorCode ierr; 2843 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2844 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2845 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2846 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2847 PetscInt cend,cstart,n,*rowners,sizesset=1; 2848 int fd; 2849 PetscInt bs = newMat->rmap->bs; 2850 2851 PetscFunctionBegin; 2852 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2853 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2854 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2855 if (!rank) { 2856 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2857 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2858 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2859 } 2860 2861 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 2862 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2863 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2864 if (bs < 0) bs = 1; 2865 2866 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2867 2868 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2869 M = header[1]; N = header[2]; 2870 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2871 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2872 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2873 2874 /* If global sizes are set, check if they are consistent with that given in the file */ 2875 if (sizesset) { 2876 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2877 } 2878 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2879 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2880 2881 /* determine ownership of all (block) rows */ 2882 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2883 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2884 else m = newMat->rmap->n; /* Set by user */ 2885 2886 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 2887 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2888 2889 /* First process needs enough room for process with most rows */ 2890 if (!rank) { 2891 mmax = rowners[1]; 2892 for (i=2; i<=size; i++) { 2893 mmax = PetscMax(mmax, rowners[i]); 2894 } 2895 } else mmax = -1; /* unused, but compilers complain */ 2896 2897 rowners[0] = 0; 2898 for (i=2; i<=size; i++) { 2899 rowners[i] += rowners[i-1]; 2900 } 2901 rstart = rowners[rank]; 2902 rend = rowners[rank+1]; 2903 2904 /* distribute row lengths to all processors */ 2905 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2906 if (!rank) { 2907 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2908 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2909 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2910 for (j=0; j<m; j++) { 2911 procsnz[0] += ourlens[j]; 2912 } 2913 for (i=1; i<size; i++) { 2914 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2915 /* calculate the number of nonzeros on each processor */ 2916 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2917 procsnz[i] += rowlengths[j]; 2918 } 2919 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2920 } 2921 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2922 } else { 2923 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2924 } 2925 2926 if (!rank) { 2927 /* determine max buffer needed and allocate it */ 2928 maxnz = 0; 2929 for (i=0; i<size; i++) { 2930 maxnz = PetscMax(maxnz,procsnz[i]); 2931 } 2932 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2933 2934 /* read in my part of the matrix column indices */ 2935 nz = procsnz[0]; 2936 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2937 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2938 2939 /* read in every one elses and ship off */ 2940 for (i=1; i<size; i++) { 2941 nz = procsnz[i]; 2942 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2943 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2944 } 2945 ierr = PetscFree(cols);CHKERRQ(ierr); 2946 } else { 2947 /* determine buffer space needed for message */ 2948 nz = 0; 2949 for (i=0; i<m; i++) { 2950 nz += ourlens[i]; 2951 } 2952 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2953 2954 /* receive message of column indices*/ 2955 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2956 } 2957 2958 /* determine column ownership if matrix is not square */ 2959 if (N != M) { 2960 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2961 else n = newMat->cmap->n; 2962 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2963 cstart = cend - n; 2964 } else { 2965 cstart = rstart; 2966 cend = rend; 2967 n = cend - cstart; 2968 } 2969 2970 /* loop over local rows, determining number of off diagonal entries */ 2971 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2972 jj = 0; 2973 for (i=0; i<m; i++) { 2974 for (j=0; j<ourlens[i]; j++) { 2975 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2976 jj++; 2977 } 2978 } 2979 2980 for (i=0; i<m; i++) { 2981 ourlens[i] -= offlens[i]; 2982 } 2983 if (!sizesset) { 2984 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2985 } 2986 2987 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2988 2989 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2990 2991 for (i=0; i<m; i++) { 2992 ourlens[i] += offlens[i]; 2993 } 2994 2995 if (!rank) { 2996 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 2997 2998 /* read in my part of the matrix numerical values */ 2999 nz = procsnz[0]; 3000 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3001 3002 /* insert into matrix */ 3003 jj = rstart; 3004 smycols = mycols; 3005 svals = vals; 3006 for (i=0; i<m; i++) { 3007 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3008 smycols += ourlens[i]; 3009 svals += ourlens[i]; 3010 jj++; 3011 } 3012 3013 /* read in other processors and ship out */ 3014 for (i=1; i<size; i++) { 3015 nz = procsnz[i]; 3016 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3017 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3018 } 3019 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3020 } else { 3021 /* receive numeric values */ 3022 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3023 3024 /* receive message of values*/ 3025 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3026 3027 /* insert into matrix */ 3028 jj = rstart; 3029 smycols = mycols; 3030 svals = vals; 3031 for (i=0; i<m; i++) { 3032 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3033 smycols += ourlens[i]; 3034 svals += ourlens[i]; 3035 jj++; 3036 } 3037 } 3038 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3039 ierr = PetscFree(vals);CHKERRQ(ierr); 3040 ierr = PetscFree(mycols);CHKERRQ(ierr); 3041 ierr = PetscFree(rowners);CHKERRQ(ierr); 3042 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3043 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3044 PetscFunctionReturn(0); 3045 } 3046 3047 #undef __FUNCT__ 3048 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3049 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3050 { 3051 PetscErrorCode ierr; 3052 IS iscol_local; 3053 PetscInt csize; 3054 3055 PetscFunctionBegin; 3056 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3057 if (call == MAT_REUSE_MATRIX) { 3058 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3059 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3060 } else { 3061 PetscInt cbs; 3062 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3063 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3064 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3065 } 3066 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3067 if (call == MAT_INITIAL_MATRIX) { 3068 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3069 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3070 } 3071 PetscFunctionReturn(0); 3072 } 3073 3074 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3075 #undef __FUNCT__ 3076 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3077 /* 3078 Not great since it makes two copies of the submatrix, first an SeqAIJ 3079 in local and then by concatenating the local matrices the end result. 3080 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3081 3082 Note: This requires a sequential iscol with all indices. 3083 */ 3084 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3085 { 3086 PetscErrorCode ierr; 3087 PetscMPIInt rank,size; 3088 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3089 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3090 PetscBool allcolumns, colflag; 3091 Mat M,Mreuse; 3092 MatScalar *vwork,*aa; 3093 MPI_Comm comm; 3094 Mat_SeqAIJ *aij; 3095 3096 PetscFunctionBegin; 3097 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3098 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3099 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3100 3101 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3102 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3103 if (colflag && ncol == mat->cmap->N) { 3104 allcolumns = PETSC_TRUE; 3105 } else { 3106 allcolumns = PETSC_FALSE; 3107 } 3108 if (call == MAT_REUSE_MATRIX) { 3109 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3110 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3111 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3112 } else { 3113 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3114 } 3115 3116 /* 3117 m - number of local rows 3118 n - number of columns (same on all processors) 3119 rstart - first row in new global matrix generated 3120 */ 3121 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3122 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3123 if (call == MAT_INITIAL_MATRIX) { 3124 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3125 ii = aij->i; 3126 jj = aij->j; 3127 3128 /* 3129 Determine the number of non-zeros in the diagonal and off-diagonal 3130 portions of the matrix in order to do correct preallocation 3131 */ 3132 3133 /* first get start and end of "diagonal" columns */ 3134 if (csize == PETSC_DECIDE) { 3135 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3136 if (mglobal == n) { /* square matrix */ 3137 nlocal = m; 3138 } else { 3139 nlocal = n/size + ((n % size) > rank); 3140 } 3141 } else { 3142 nlocal = csize; 3143 } 3144 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3145 rstart = rend - nlocal; 3146 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3147 3148 /* next, compute all the lengths */ 3149 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3150 olens = dlens + m; 3151 for (i=0; i<m; i++) { 3152 jend = ii[i+1] - ii[i]; 3153 olen = 0; 3154 dlen = 0; 3155 for (j=0; j<jend; j++) { 3156 if (*jj < rstart || *jj >= rend) olen++; 3157 else dlen++; 3158 jj++; 3159 } 3160 olens[i] = olen; 3161 dlens[i] = dlen; 3162 } 3163 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3164 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3165 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3166 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3167 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3168 ierr = PetscFree(dlens);CHKERRQ(ierr); 3169 } else { 3170 PetscInt ml,nl; 3171 3172 M = *newmat; 3173 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3174 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3175 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3176 /* 3177 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3178 rather than the slower MatSetValues(). 3179 */ 3180 M->was_assembled = PETSC_TRUE; 3181 M->assembled = PETSC_FALSE; 3182 } 3183 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3184 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3185 ii = aij->i; 3186 jj = aij->j; 3187 aa = aij->a; 3188 for (i=0; i<m; i++) { 3189 row = rstart + i; 3190 nz = ii[i+1] - ii[i]; 3191 cwork = jj; jj += nz; 3192 vwork = aa; aa += nz; 3193 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3194 } 3195 3196 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3197 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3198 *newmat = M; 3199 3200 /* save submatrix used in processor for next request */ 3201 if (call == MAT_INITIAL_MATRIX) { 3202 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3203 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3204 } 3205 PetscFunctionReturn(0); 3206 } 3207 3208 #undef __FUNCT__ 3209 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3210 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3211 { 3212 PetscInt m,cstart, cend,j,nnz,i,d; 3213 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3214 const PetscInt *JJ; 3215 PetscScalar *values; 3216 PetscErrorCode ierr; 3217 3218 PetscFunctionBegin; 3219 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3220 3221 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3222 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3223 m = B->rmap->n; 3224 cstart = B->cmap->rstart; 3225 cend = B->cmap->rend; 3226 rstart = B->rmap->rstart; 3227 3228 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3229 3230 #if defined(PETSC_USE_DEBUGGING) 3231 for (i=0; i<m; i++) { 3232 nnz = Ii[i+1]- Ii[i]; 3233 JJ = J + Ii[i]; 3234 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3235 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3236 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3237 } 3238 #endif 3239 3240 for (i=0; i<m; i++) { 3241 nnz = Ii[i+1]- Ii[i]; 3242 JJ = J + Ii[i]; 3243 nnz_max = PetscMax(nnz_max,nnz); 3244 d = 0; 3245 for (j=0; j<nnz; j++) { 3246 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3247 } 3248 d_nnz[i] = d; 3249 o_nnz[i] = nnz - d; 3250 } 3251 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3252 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3253 3254 if (v) values = (PetscScalar*)v; 3255 else { 3256 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3257 } 3258 3259 for (i=0; i<m; i++) { 3260 ii = i + rstart; 3261 nnz = Ii[i+1]- Ii[i]; 3262 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3263 } 3264 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3266 3267 if (!v) { 3268 ierr = PetscFree(values);CHKERRQ(ierr); 3269 } 3270 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3271 PetscFunctionReturn(0); 3272 } 3273 3274 #undef __FUNCT__ 3275 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3276 /*@ 3277 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3278 (the default parallel PETSc format). 3279 3280 Collective on MPI_Comm 3281 3282 Input Parameters: 3283 + B - the matrix 3284 . i - the indices into j for the start of each local row (starts with zero) 3285 . j - the column indices for each local row (starts with zero) 3286 - v - optional values in the matrix 3287 3288 Level: developer 3289 3290 Notes: 3291 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3292 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3293 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3294 3295 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3296 3297 The format which is used for the sparse matrix input, is equivalent to a 3298 row-major ordering.. i.e for the following matrix, the input data expected is 3299 as shown: 3300 3301 1 0 0 3302 2 0 3 P0 3303 ------- 3304 4 5 6 P1 3305 3306 Process0 [P0]: rows_owned=[0,1] 3307 i = {0,1,3} [size = nrow+1 = 2+1] 3308 j = {0,0,2} [size = nz = 6] 3309 v = {1,2,3} [size = nz = 6] 3310 3311 Process1 [P1]: rows_owned=[2] 3312 i = {0,3} [size = nrow+1 = 1+1] 3313 j = {0,1,2} [size = nz = 6] 3314 v = {4,5,6} [size = nz = 6] 3315 3316 .keywords: matrix, aij, compressed row, sparse, parallel 3317 3318 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3319 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3320 @*/ 3321 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3322 { 3323 PetscErrorCode ierr; 3324 3325 PetscFunctionBegin; 3326 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3327 PetscFunctionReturn(0); 3328 } 3329 3330 #undef __FUNCT__ 3331 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3332 /*@C 3333 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3334 (the default parallel PETSc format). For good matrix assembly performance 3335 the user should preallocate the matrix storage by setting the parameters 3336 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3337 performance can be increased by more than a factor of 50. 3338 3339 Collective on MPI_Comm 3340 3341 Input Parameters: 3342 + B - the matrix 3343 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3344 (same value is used for all local rows) 3345 . d_nnz - array containing the number of nonzeros in the various rows of the 3346 DIAGONAL portion of the local submatrix (possibly different for each row) 3347 or NULL, if d_nz is used to specify the nonzero structure. 3348 The size of this array is equal to the number of local rows, i.e 'm'. 3349 For matrices that will be factored, you must leave room for (and set) 3350 the diagonal entry even if it is zero. 3351 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3352 submatrix (same value is used for all local rows). 3353 - o_nnz - array containing the number of nonzeros in the various rows of the 3354 OFF-DIAGONAL portion of the local submatrix (possibly different for 3355 each row) or NULL, if o_nz is used to specify the nonzero 3356 structure. The size of this array is equal to the number 3357 of local rows, i.e 'm'. 3358 3359 If the *_nnz parameter is given then the *_nz parameter is ignored 3360 3361 The AIJ format (also called the Yale sparse matrix format or 3362 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3363 storage. The stored row and column indices begin with zero. 3364 See Users-Manual: ch_mat for details. 3365 3366 The parallel matrix is partitioned such that the first m0 rows belong to 3367 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3368 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3369 3370 The DIAGONAL portion of the local submatrix of a processor can be defined 3371 as the submatrix which is obtained by extraction the part corresponding to 3372 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3373 first row that belongs to the processor, r2 is the last row belonging to 3374 the this processor, and c1-c2 is range of indices of the local part of a 3375 vector suitable for applying the matrix to. This is an mxn matrix. In the 3376 common case of a square matrix, the row and column ranges are the same and 3377 the DIAGONAL part is also square. The remaining portion of the local 3378 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3379 3380 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3381 3382 You can call MatGetInfo() to get information on how effective the preallocation was; 3383 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3384 You can also run with the option -info and look for messages with the string 3385 malloc in them to see if additional memory allocation was needed. 3386 3387 Example usage: 3388 3389 Consider the following 8x8 matrix with 34 non-zero values, that is 3390 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3391 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3392 as follows: 3393 3394 .vb 3395 1 2 0 | 0 3 0 | 0 4 3396 Proc0 0 5 6 | 7 0 0 | 8 0 3397 9 0 10 | 11 0 0 | 12 0 3398 ------------------------------------- 3399 13 0 14 | 15 16 17 | 0 0 3400 Proc1 0 18 0 | 19 20 21 | 0 0 3401 0 0 0 | 22 23 0 | 24 0 3402 ------------------------------------- 3403 Proc2 25 26 27 | 0 0 28 | 29 0 3404 30 0 0 | 31 32 33 | 0 34 3405 .ve 3406 3407 This can be represented as a collection of submatrices as: 3408 3409 .vb 3410 A B C 3411 D E F 3412 G H I 3413 .ve 3414 3415 Where the submatrices A,B,C are owned by proc0, D,E,F are 3416 owned by proc1, G,H,I are owned by proc2. 3417 3418 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3419 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3420 The 'M','N' parameters are 8,8, and have the same values on all procs. 3421 3422 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3423 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3424 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3425 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3426 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3427 matrix, ans [DF] as another SeqAIJ matrix. 3428 3429 When d_nz, o_nz parameters are specified, d_nz storage elements are 3430 allocated for every row of the local diagonal submatrix, and o_nz 3431 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3432 One way to choose d_nz and o_nz is to use the max nonzerors per local 3433 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3434 In this case, the values of d_nz,o_nz are: 3435 .vb 3436 proc0 : dnz = 2, o_nz = 2 3437 proc1 : dnz = 3, o_nz = 2 3438 proc2 : dnz = 1, o_nz = 4 3439 .ve 3440 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3441 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3442 for proc3. i.e we are using 12+15+10=37 storage locations to store 3443 34 values. 3444 3445 When d_nnz, o_nnz parameters are specified, the storage is specified 3446 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3447 In the above case the values for d_nnz,o_nnz are: 3448 .vb 3449 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3450 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3451 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3452 .ve 3453 Here the space allocated is sum of all the above values i.e 34, and 3454 hence pre-allocation is perfect. 3455 3456 Level: intermediate 3457 3458 .keywords: matrix, aij, compressed row, sparse, parallel 3459 3460 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3461 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3462 @*/ 3463 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3464 { 3465 PetscErrorCode ierr; 3466 3467 PetscFunctionBegin; 3468 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3469 PetscValidType(B,1); 3470 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3471 PetscFunctionReturn(0); 3472 } 3473 3474 #undef __FUNCT__ 3475 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3476 /*@ 3477 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3478 CSR format the local rows. 3479 3480 Collective on MPI_Comm 3481 3482 Input Parameters: 3483 + comm - MPI communicator 3484 . m - number of local rows (Cannot be PETSC_DECIDE) 3485 . n - This value should be the same as the local size used in creating the 3486 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3487 calculated if N is given) For square matrices n is almost always m. 3488 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3489 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3490 . i - row indices 3491 . j - column indices 3492 - a - matrix values 3493 3494 Output Parameter: 3495 . mat - the matrix 3496 3497 Level: intermediate 3498 3499 Notes: 3500 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3501 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3502 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3503 3504 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3505 3506 The format which is used for the sparse matrix input, is equivalent to a 3507 row-major ordering.. i.e for the following matrix, the input data expected is 3508 as shown: 3509 3510 1 0 0 3511 2 0 3 P0 3512 ------- 3513 4 5 6 P1 3514 3515 Process0 [P0]: rows_owned=[0,1] 3516 i = {0,1,3} [size = nrow+1 = 2+1] 3517 j = {0,0,2} [size = nz = 6] 3518 v = {1,2,3} [size = nz = 6] 3519 3520 Process1 [P1]: rows_owned=[2] 3521 i = {0,3} [size = nrow+1 = 1+1] 3522 j = {0,1,2} [size = nz = 6] 3523 v = {4,5,6} [size = nz = 6] 3524 3525 .keywords: matrix, aij, compressed row, sparse, parallel 3526 3527 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3528 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3529 @*/ 3530 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3531 { 3532 PetscErrorCode ierr; 3533 3534 PetscFunctionBegin; 3535 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3536 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3537 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3538 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3539 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3540 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3541 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3542 PetscFunctionReturn(0); 3543 } 3544 3545 #undef __FUNCT__ 3546 #define __FUNCT__ "MatCreateAIJ" 3547 /*@C 3548 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3549 (the default parallel PETSc format). For good matrix assembly performance 3550 the user should preallocate the matrix storage by setting the parameters 3551 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3552 performance can be increased by more than a factor of 50. 3553 3554 Collective on MPI_Comm 3555 3556 Input Parameters: 3557 + comm - MPI communicator 3558 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3559 This value should be the same as the local size used in creating the 3560 y vector for the matrix-vector product y = Ax. 3561 . n - This value should be the same as the local size used in creating the 3562 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3563 calculated if N is given) For square matrices n is almost always m. 3564 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3565 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3566 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3567 (same value is used for all local rows) 3568 . d_nnz - array containing the number of nonzeros in the various rows of the 3569 DIAGONAL portion of the local submatrix (possibly different for each row) 3570 or NULL, if d_nz is used to specify the nonzero structure. 3571 The size of this array is equal to the number of local rows, i.e 'm'. 3572 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3573 submatrix (same value is used for all local rows). 3574 - o_nnz - array containing the number of nonzeros in the various rows of the 3575 OFF-DIAGONAL portion of the local submatrix (possibly different for 3576 each row) or NULL, if o_nz is used to specify the nonzero 3577 structure. The size of this array is equal to the number 3578 of local rows, i.e 'm'. 3579 3580 Output Parameter: 3581 . A - the matrix 3582 3583 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3584 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3585 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3586 3587 Notes: 3588 If the *_nnz parameter is given then the *_nz parameter is ignored 3589 3590 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3591 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3592 storage requirements for this matrix. 3593 3594 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3595 processor than it must be used on all processors that share the object for 3596 that argument. 3597 3598 The user MUST specify either the local or global matrix dimensions 3599 (possibly both). 3600 3601 The parallel matrix is partitioned across processors such that the 3602 first m0 rows belong to process 0, the next m1 rows belong to 3603 process 1, the next m2 rows belong to process 2 etc.. where 3604 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3605 values corresponding to [m x N] submatrix. 3606 3607 The columns are logically partitioned with the n0 columns belonging 3608 to 0th partition, the next n1 columns belonging to the next 3609 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3610 3611 The DIAGONAL portion of the local submatrix on any given processor 3612 is the submatrix corresponding to the rows and columns m,n 3613 corresponding to the given processor. i.e diagonal matrix on 3614 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3615 etc. The remaining portion of the local submatrix [m x (N-n)] 3616 constitute the OFF-DIAGONAL portion. The example below better 3617 illustrates this concept. 3618 3619 For a square global matrix we define each processor's diagonal portion 3620 to be its local rows and the corresponding columns (a square submatrix); 3621 each processor's off-diagonal portion encompasses the remainder of the 3622 local matrix (a rectangular submatrix). 3623 3624 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3625 3626 When calling this routine with a single process communicator, a matrix of 3627 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3628 type of communicator, use the construction mechanism: 3629 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3630 3631 By default, this format uses inodes (identical nodes) when possible. 3632 We search for consecutive rows with the same nonzero structure, thereby 3633 reusing matrix information to achieve increased efficiency. 3634 3635 Options Database Keys: 3636 + -mat_no_inode - Do not use inodes 3637 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3638 - -mat_aij_oneindex - Internally use indexing starting at 1 3639 rather than 0. Note that when calling MatSetValues(), 3640 the user still MUST index entries starting at 0! 3641 3642 3643 Example usage: 3644 3645 Consider the following 8x8 matrix with 34 non-zero values, that is 3646 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3647 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3648 as follows: 3649 3650 .vb 3651 1 2 0 | 0 3 0 | 0 4 3652 Proc0 0 5 6 | 7 0 0 | 8 0 3653 9 0 10 | 11 0 0 | 12 0 3654 ------------------------------------- 3655 13 0 14 | 15 16 17 | 0 0 3656 Proc1 0 18 0 | 19 20 21 | 0 0 3657 0 0 0 | 22 23 0 | 24 0 3658 ------------------------------------- 3659 Proc2 25 26 27 | 0 0 28 | 29 0 3660 30 0 0 | 31 32 33 | 0 34 3661 .ve 3662 3663 This can be represented as a collection of submatrices as: 3664 3665 .vb 3666 A B C 3667 D E F 3668 G H I 3669 .ve 3670 3671 Where the submatrices A,B,C are owned by proc0, D,E,F are 3672 owned by proc1, G,H,I are owned by proc2. 3673 3674 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3675 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3676 The 'M','N' parameters are 8,8, and have the same values on all procs. 3677 3678 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3679 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3680 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3681 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3682 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3683 matrix, ans [DF] as another SeqAIJ matrix. 3684 3685 When d_nz, o_nz parameters are specified, d_nz storage elements are 3686 allocated for every row of the local diagonal submatrix, and o_nz 3687 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3688 One way to choose d_nz and o_nz is to use the max nonzerors per local 3689 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3690 In this case, the values of d_nz,o_nz are: 3691 .vb 3692 proc0 : dnz = 2, o_nz = 2 3693 proc1 : dnz = 3, o_nz = 2 3694 proc2 : dnz = 1, o_nz = 4 3695 .ve 3696 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3697 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3698 for proc3. i.e we are using 12+15+10=37 storage locations to store 3699 34 values. 3700 3701 When d_nnz, o_nnz parameters are specified, the storage is specified 3702 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3703 In the above case the values for d_nnz,o_nnz are: 3704 .vb 3705 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3706 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3707 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3708 .ve 3709 Here the space allocated is sum of all the above values i.e 34, and 3710 hence pre-allocation is perfect. 3711 3712 Level: intermediate 3713 3714 .keywords: matrix, aij, compressed row, sparse, parallel 3715 3716 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3717 MPIAIJ, MatCreateMPIAIJWithArrays() 3718 @*/ 3719 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3720 { 3721 PetscErrorCode ierr; 3722 PetscMPIInt size; 3723 3724 PetscFunctionBegin; 3725 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3726 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3727 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3728 if (size > 1) { 3729 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3730 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3731 } else { 3732 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3733 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3734 } 3735 PetscFunctionReturn(0); 3736 } 3737 3738 #undef __FUNCT__ 3739 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3740 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3741 { 3742 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3743 3744 PetscFunctionBegin; 3745 if (Ad) *Ad = a->A; 3746 if (Ao) *Ao = a->B; 3747 if (colmap) *colmap = a->garray; 3748 PetscFunctionReturn(0); 3749 } 3750 3751 #undef __FUNCT__ 3752 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3753 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3754 { 3755 PetscErrorCode ierr; 3756 PetscInt i; 3757 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3758 3759 PetscFunctionBegin; 3760 if (coloring->ctype == IS_COLORING_GLOBAL) { 3761 ISColoringValue *allcolors,*colors; 3762 ISColoring ocoloring; 3763 3764 /* set coloring for diagonal portion */ 3765 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3766 3767 /* set coloring for off-diagonal portion */ 3768 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3769 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 3770 for (i=0; i<a->B->cmap->n; i++) { 3771 colors[i] = allcolors[a->garray[i]]; 3772 } 3773 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3774 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3775 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3776 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3777 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3778 ISColoringValue *colors; 3779 PetscInt *larray; 3780 ISColoring ocoloring; 3781 3782 /* set coloring for diagonal portion */ 3783 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 3784 for (i=0; i<a->A->cmap->n; i++) { 3785 larray[i] = i + A->cmap->rstart; 3786 } 3787 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3788 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 3789 for (i=0; i<a->A->cmap->n; i++) { 3790 colors[i] = coloring->colors[larray[i]]; 3791 } 3792 ierr = PetscFree(larray);CHKERRQ(ierr); 3793 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3794 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3795 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3796 3797 /* set coloring for off-diagonal portion */ 3798 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 3799 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3800 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 3801 for (i=0; i<a->B->cmap->n; i++) { 3802 colors[i] = coloring->colors[larray[i]]; 3803 } 3804 ierr = PetscFree(larray);CHKERRQ(ierr); 3805 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3806 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3807 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3808 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3809 PetscFunctionReturn(0); 3810 } 3811 3812 #undef __FUNCT__ 3813 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3814 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3815 { 3816 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3817 PetscErrorCode ierr; 3818 3819 PetscFunctionBegin; 3820 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3821 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3822 PetscFunctionReturn(0); 3823 } 3824 3825 #undef __FUNCT__ 3826 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3827 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3828 { 3829 PetscErrorCode ierr; 3830 PetscInt m,N,i,rstart,nnz,Ii; 3831 PetscInt *indx; 3832 PetscScalar *values; 3833 3834 PetscFunctionBegin; 3835 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3836 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3837 PetscInt *dnz,*onz,sum,bs,cbs; 3838 3839 if (n == PETSC_DECIDE) { 3840 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3841 } 3842 /* Check sum(n) = N */ 3843 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3844 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3845 3846 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3847 rstart -= m; 3848 3849 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3850 for (i=0; i<m; i++) { 3851 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3852 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3853 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3854 } 3855 3856 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3857 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3858 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3859 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3860 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3861 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3862 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3863 } 3864 3865 /* numeric phase */ 3866 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3867 for (i=0; i<m; i++) { 3868 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3869 Ii = i + rstart; 3870 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3871 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3872 } 3873 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3874 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3875 PetscFunctionReturn(0); 3876 } 3877 3878 #undef __FUNCT__ 3879 #define __FUNCT__ "MatFileSplit" 3880 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3881 { 3882 PetscErrorCode ierr; 3883 PetscMPIInt rank; 3884 PetscInt m,N,i,rstart,nnz; 3885 size_t len; 3886 const PetscInt *indx; 3887 PetscViewer out; 3888 char *name; 3889 Mat B; 3890 const PetscScalar *values; 3891 3892 PetscFunctionBegin; 3893 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3894 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3895 /* Should this be the type of the diagonal block of A? */ 3896 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3897 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3898 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3899 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3900 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3901 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3902 for (i=0; i<m; i++) { 3903 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3904 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3905 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3906 } 3907 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3908 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3909 3910 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3911 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3912 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 3913 sprintf(name,"%s.%d",outfile,rank); 3914 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3915 ierr = PetscFree(name);CHKERRQ(ierr); 3916 ierr = MatView(B,out);CHKERRQ(ierr); 3917 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3918 ierr = MatDestroy(&B);CHKERRQ(ierr); 3919 PetscFunctionReturn(0); 3920 } 3921 3922 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3923 #undef __FUNCT__ 3924 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3925 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3926 { 3927 PetscErrorCode ierr; 3928 Mat_Merge_SeqsToMPI *merge; 3929 PetscContainer container; 3930 3931 PetscFunctionBegin; 3932 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3933 if (container) { 3934 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3935 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3936 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3937 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3938 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3939 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3940 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3941 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3942 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3943 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3944 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3945 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3946 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3947 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3948 ierr = PetscFree(merge);CHKERRQ(ierr); 3949 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3950 } 3951 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3952 PetscFunctionReturn(0); 3953 } 3954 3955 #include <../src/mat/utils/freespace.h> 3956 #include <petscbt.h> 3957 3958 #undef __FUNCT__ 3959 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3960 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3961 { 3962 PetscErrorCode ierr; 3963 MPI_Comm comm; 3964 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3965 PetscMPIInt size,rank,taga,*len_s; 3966 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3967 PetscInt proc,m; 3968 PetscInt **buf_ri,**buf_rj; 3969 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3970 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3971 MPI_Request *s_waits,*r_waits; 3972 MPI_Status *status; 3973 MatScalar *aa=a->a; 3974 MatScalar **abuf_r,*ba_i; 3975 Mat_Merge_SeqsToMPI *merge; 3976 PetscContainer container; 3977 3978 PetscFunctionBegin; 3979 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3980 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3981 3982 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3983 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3984 3985 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3986 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3987 3988 bi = merge->bi; 3989 bj = merge->bj; 3990 buf_ri = merge->buf_ri; 3991 buf_rj = merge->buf_rj; 3992 3993 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3994 owners = merge->rowmap->range; 3995 len_s = merge->len_s; 3996 3997 /* send and recv matrix values */ 3998 /*-----------------------------*/ 3999 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4000 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4001 4002 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4003 for (proc=0,k=0; proc<size; proc++) { 4004 if (!len_s[proc]) continue; 4005 i = owners[proc]; 4006 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4007 k++; 4008 } 4009 4010 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4011 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4012 ierr = PetscFree(status);CHKERRQ(ierr); 4013 4014 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4015 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4016 4017 /* insert mat values of mpimat */ 4018 /*----------------------------*/ 4019 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4020 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4021 4022 for (k=0; k<merge->nrecv; k++) { 4023 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4024 nrows = *(buf_ri_k[k]); 4025 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4026 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4027 } 4028 4029 /* set values of ba */ 4030 m = merge->rowmap->n; 4031 for (i=0; i<m; i++) { 4032 arow = owners[rank] + i; 4033 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4034 bnzi = bi[i+1] - bi[i]; 4035 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4036 4037 /* add local non-zero vals of this proc's seqmat into ba */ 4038 anzi = ai[arow+1] - ai[arow]; 4039 aj = a->j + ai[arow]; 4040 aa = a->a + ai[arow]; 4041 nextaj = 0; 4042 for (j=0; nextaj<anzi; j++) { 4043 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4044 ba_i[j] += aa[nextaj++]; 4045 } 4046 } 4047 4048 /* add received vals into ba */ 4049 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4050 /* i-th row */ 4051 if (i == *nextrow[k]) { 4052 anzi = *(nextai[k]+1) - *nextai[k]; 4053 aj = buf_rj[k] + *(nextai[k]); 4054 aa = abuf_r[k] + *(nextai[k]); 4055 nextaj = 0; 4056 for (j=0; nextaj<anzi; j++) { 4057 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4058 ba_i[j] += aa[nextaj++]; 4059 } 4060 } 4061 nextrow[k]++; nextai[k]++; 4062 } 4063 } 4064 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4065 } 4066 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4067 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4068 4069 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4070 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4071 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4072 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4073 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4074 PetscFunctionReturn(0); 4075 } 4076 4077 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4078 4079 #undef __FUNCT__ 4080 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4081 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4082 { 4083 PetscErrorCode ierr; 4084 Mat B_mpi; 4085 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4086 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4087 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4088 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4089 PetscInt len,proc,*dnz,*onz,bs,cbs; 4090 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4091 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4092 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4093 MPI_Status *status; 4094 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4095 PetscBT lnkbt; 4096 Mat_Merge_SeqsToMPI *merge; 4097 PetscContainer container; 4098 4099 PetscFunctionBegin; 4100 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4101 4102 /* make sure it is a PETSc comm */ 4103 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4104 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4105 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4106 4107 ierr = PetscNew(&merge);CHKERRQ(ierr); 4108 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4109 4110 /* determine row ownership */ 4111 /*---------------------------------------------------------*/ 4112 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4113 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4114 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4115 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4116 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4117 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4118 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4119 4120 m = merge->rowmap->n; 4121 owners = merge->rowmap->range; 4122 4123 /* determine the number of messages to send, their lengths */ 4124 /*---------------------------------------------------------*/ 4125 len_s = merge->len_s; 4126 4127 len = 0; /* length of buf_si[] */ 4128 merge->nsend = 0; 4129 for (proc=0; proc<size; proc++) { 4130 len_si[proc] = 0; 4131 if (proc == rank) { 4132 len_s[proc] = 0; 4133 } else { 4134 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4135 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4136 } 4137 if (len_s[proc]) { 4138 merge->nsend++; 4139 nrows = 0; 4140 for (i=owners[proc]; i<owners[proc+1]; i++) { 4141 if (ai[i+1] > ai[i]) nrows++; 4142 } 4143 len_si[proc] = 2*(nrows+1); 4144 len += len_si[proc]; 4145 } 4146 } 4147 4148 /* determine the number and length of messages to receive for ij-structure */ 4149 /*-------------------------------------------------------------------------*/ 4150 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4151 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4152 4153 /* post the Irecv of j-structure */ 4154 /*-------------------------------*/ 4155 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4156 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4157 4158 /* post the Isend of j-structure */ 4159 /*--------------------------------*/ 4160 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4161 4162 for (proc=0, k=0; proc<size; proc++) { 4163 if (!len_s[proc]) continue; 4164 i = owners[proc]; 4165 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4166 k++; 4167 } 4168 4169 /* receives and sends of j-structure are complete */ 4170 /*------------------------------------------------*/ 4171 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4172 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4173 4174 /* send and recv i-structure */ 4175 /*---------------------------*/ 4176 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4177 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4178 4179 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4180 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4181 for (proc=0,k=0; proc<size; proc++) { 4182 if (!len_s[proc]) continue; 4183 /* form outgoing message for i-structure: 4184 buf_si[0]: nrows to be sent 4185 [1:nrows]: row index (global) 4186 [nrows+1:2*nrows+1]: i-structure index 4187 */ 4188 /*-------------------------------------------*/ 4189 nrows = len_si[proc]/2 - 1; 4190 buf_si_i = buf_si + nrows+1; 4191 buf_si[0] = nrows; 4192 buf_si_i[0] = 0; 4193 nrows = 0; 4194 for (i=owners[proc]; i<owners[proc+1]; i++) { 4195 anzi = ai[i+1] - ai[i]; 4196 if (anzi) { 4197 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4198 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4199 nrows++; 4200 } 4201 } 4202 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4203 k++; 4204 buf_si += len_si[proc]; 4205 } 4206 4207 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4208 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4209 4210 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4211 for (i=0; i<merge->nrecv; i++) { 4212 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4213 } 4214 4215 ierr = PetscFree(len_si);CHKERRQ(ierr); 4216 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4217 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4218 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4219 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4220 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4221 ierr = PetscFree(status);CHKERRQ(ierr); 4222 4223 /* compute a local seq matrix in each processor */ 4224 /*----------------------------------------------*/ 4225 /* allocate bi array and free space for accumulating nonzero column info */ 4226 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4227 bi[0] = 0; 4228 4229 /* create and initialize a linked list */ 4230 nlnk = N+1; 4231 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4232 4233 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4234 len = ai[owners[rank+1]] - ai[owners[rank]]; 4235 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4236 4237 current_space = free_space; 4238 4239 /* determine symbolic info for each local row */ 4240 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4241 4242 for (k=0; k<merge->nrecv; k++) { 4243 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4244 nrows = *buf_ri_k[k]; 4245 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4246 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4247 } 4248 4249 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4250 len = 0; 4251 for (i=0; i<m; i++) { 4252 bnzi = 0; 4253 /* add local non-zero cols of this proc's seqmat into lnk */ 4254 arow = owners[rank] + i; 4255 anzi = ai[arow+1] - ai[arow]; 4256 aj = a->j + ai[arow]; 4257 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4258 bnzi += nlnk; 4259 /* add received col data into lnk */ 4260 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4261 if (i == *nextrow[k]) { /* i-th row */ 4262 anzi = *(nextai[k]+1) - *nextai[k]; 4263 aj = buf_rj[k] + *nextai[k]; 4264 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4265 bnzi += nlnk; 4266 nextrow[k]++; nextai[k]++; 4267 } 4268 } 4269 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4270 4271 /* if free space is not available, make more free space */ 4272 if (current_space->local_remaining<bnzi) { 4273 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4274 nspacedouble++; 4275 } 4276 /* copy data into free space, then initialize lnk */ 4277 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4278 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4279 4280 current_space->array += bnzi; 4281 current_space->local_used += bnzi; 4282 current_space->local_remaining -= bnzi; 4283 4284 bi[i+1] = bi[i] + bnzi; 4285 } 4286 4287 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4288 4289 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4290 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4291 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4292 4293 /* create symbolic parallel matrix B_mpi */ 4294 /*---------------------------------------*/ 4295 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4296 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4297 if (n==PETSC_DECIDE) { 4298 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4299 } else { 4300 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4301 } 4302 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4303 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4304 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4305 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4306 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4307 4308 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4309 B_mpi->assembled = PETSC_FALSE; 4310 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4311 merge->bi = bi; 4312 merge->bj = bj; 4313 merge->buf_ri = buf_ri; 4314 merge->buf_rj = buf_rj; 4315 merge->coi = NULL; 4316 merge->coj = NULL; 4317 merge->owners_co = NULL; 4318 4319 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4320 4321 /* attach the supporting struct to B_mpi for reuse */ 4322 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4323 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4324 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4325 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4326 *mpimat = B_mpi; 4327 4328 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4329 PetscFunctionReturn(0); 4330 } 4331 4332 #undef __FUNCT__ 4333 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4334 /*@C 4335 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4336 matrices from each processor 4337 4338 Collective on MPI_Comm 4339 4340 Input Parameters: 4341 + comm - the communicators the parallel matrix will live on 4342 . seqmat - the input sequential matrices 4343 . m - number of local rows (or PETSC_DECIDE) 4344 . n - number of local columns (or PETSC_DECIDE) 4345 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4346 4347 Output Parameter: 4348 . mpimat - the parallel matrix generated 4349 4350 Level: advanced 4351 4352 Notes: 4353 The dimensions of the sequential matrix in each processor MUST be the same. 4354 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4355 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4356 @*/ 4357 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4358 { 4359 PetscErrorCode ierr; 4360 PetscMPIInt size; 4361 4362 PetscFunctionBegin; 4363 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4364 if (size == 1) { 4365 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4366 if (scall == MAT_INITIAL_MATRIX) { 4367 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4368 } else { 4369 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4370 } 4371 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4372 PetscFunctionReturn(0); 4373 } 4374 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4375 if (scall == MAT_INITIAL_MATRIX) { 4376 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4377 } 4378 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4379 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4380 PetscFunctionReturn(0); 4381 } 4382 4383 #undef __FUNCT__ 4384 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4385 /*@ 4386 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4387 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4388 with MatGetSize() 4389 4390 Not Collective 4391 4392 Input Parameters: 4393 + A - the matrix 4394 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4395 4396 Output Parameter: 4397 . A_loc - the local sequential matrix generated 4398 4399 Level: developer 4400 4401 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4402 4403 @*/ 4404 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4405 { 4406 PetscErrorCode ierr; 4407 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4408 Mat_SeqAIJ *mat,*a,*b; 4409 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4410 MatScalar *aa,*ba,*cam; 4411 PetscScalar *ca; 4412 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4413 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4414 PetscBool match; 4415 MPI_Comm comm; 4416 PetscMPIInt size; 4417 4418 PetscFunctionBegin; 4419 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4420 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4421 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4422 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4423 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4424 4425 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4426 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4427 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4428 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4429 aa = a->a; ba = b->a; 4430 if (scall == MAT_INITIAL_MATRIX) { 4431 if (size == 1) { 4432 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4433 PetscFunctionReturn(0); 4434 } 4435 4436 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4437 ci[0] = 0; 4438 for (i=0; i<am; i++) { 4439 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4440 } 4441 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4442 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4443 k = 0; 4444 for (i=0; i<am; i++) { 4445 ncols_o = bi[i+1] - bi[i]; 4446 ncols_d = ai[i+1] - ai[i]; 4447 /* off-diagonal portion of A */ 4448 for (jo=0; jo<ncols_o; jo++) { 4449 col = cmap[*bj]; 4450 if (col >= cstart) break; 4451 cj[k] = col; bj++; 4452 ca[k++] = *ba++; 4453 } 4454 /* diagonal portion of A */ 4455 for (j=0; j<ncols_d; j++) { 4456 cj[k] = cstart + *aj++; 4457 ca[k++] = *aa++; 4458 } 4459 /* off-diagonal portion of A */ 4460 for (j=jo; j<ncols_o; j++) { 4461 cj[k] = cmap[*bj++]; 4462 ca[k++] = *ba++; 4463 } 4464 } 4465 /* put together the new matrix */ 4466 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4467 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4468 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4469 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4470 mat->free_a = PETSC_TRUE; 4471 mat->free_ij = PETSC_TRUE; 4472 mat->nonew = 0; 4473 } else if (scall == MAT_REUSE_MATRIX) { 4474 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4475 ci = mat->i; cj = mat->j; cam = mat->a; 4476 for (i=0; i<am; i++) { 4477 /* off-diagonal portion of A */ 4478 ncols_o = bi[i+1] - bi[i]; 4479 for (jo=0; jo<ncols_o; jo++) { 4480 col = cmap[*bj]; 4481 if (col >= cstart) break; 4482 *cam++ = *ba++; bj++; 4483 } 4484 /* diagonal portion of A */ 4485 ncols_d = ai[i+1] - ai[i]; 4486 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4487 /* off-diagonal portion of A */ 4488 for (j=jo; j<ncols_o; j++) { 4489 *cam++ = *ba++; bj++; 4490 } 4491 } 4492 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4493 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4494 PetscFunctionReturn(0); 4495 } 4496 4497 #undef __FUNCT__ 4498 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4499 /*@C 4500 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4501 4502 Not Collective 4503 4504 Input Parameters: 4505 + A - the matrix 4506 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4507 - row, col - index sets of rows and columns to extract (or NULL) 4508 4509 Output Parameter: 4510 . A_loc - the local sequential matrix generated 4511 4512 Level: developer 4513 4514 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4515 4516 @*/ 4517 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4518 { 4519 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4520 PetscErrorCode ierr; 4521 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4522 IS isrowa,iscola; 4523 Mat *aloc; 4524 PetscBool match; 4525 4526 PetscFunctionBegin; 4527 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4528 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4529 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4530 if (!row) { 4531 start = A->rmap->rstart; end = A->rmap->rend; 4532 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4533 } else { 4534 isrowa = *row; 4535 } 4536 if (!col) { 4537 start = A->cmap->rstart; 4538 cmap = a->garray; 4539 nzA = a->A->cmap->n; 4540 nzB = a->B->cmap->n; 4541 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 4542 ncols = 0; 4543 for (i=0; i<nzB; i++) { 4544 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4545 else break; 4546 } 4547 imark = i; 4548 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4549 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4550 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4551 } else { 4552 iscola = *col; 4553 } 4554 if (scall != MAT_INITIAL_MATRIX) { 4555 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4556 aloc[0] = *A_loc; 4557 } 4558 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4559 *A_loc = aloc[0]; 4560 ierr = PetscFree(aloc);CHKERRQ(ierr); 4561 if (!row) { 4562 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4563 } 4564 if (!col) { 4565 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4566 } 4567 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4568 PetscFunctionReturn(0); 4569 } 4570 4571 #undef __FUNCT__ 4572 #define __FUNCT__ "MatGetBrowsOfAcols" 4573 /*@C 4574 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4575 4576 Collective on Mat 4577 4578 Input Parameters: 4579 + A,B - the matrices in mpiaij format 4580 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4581 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4582 4583 Output Parameter: 4584 + rowb, colb - index sets of rows and columns of B to extract 4585 - B_seq - the sequential matrix generated 4586 4587 Level: developer 4588 4589 @*/ 4590 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4591 { 4592 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4593 PetscErrorCode ierr; 4594 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4595 IS isrowb,iscolb; 4596 Mat *bseq=NULL; 4597 4598 PetscFunctionBegin; 4599 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4600 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4601 } 4602 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4603 4604 if (scall == MAT_INITIAL_MATRIX) { 4605 start = A->cmap->rstart; 4606 cmap = a->garray; 4607 nzA = a->A->cmap->n; 4608 nzB = a->B->cmap->n; 4609 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 4610 ncols = 0; 4611 for (i=0; i<nzB; i++) { /* row < local row index */ 4612 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4613 else break; 4614 } 4615 imark = i; 4616 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4617 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4618 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4619 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4620 } else { 4621 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4622 isrowb = *rowb; iscolb = *colb; 4623 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4624 bseq[0] = *B_seq; 4625 } 4626 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4627 *B_seq = bseq[0]; 4628 ierr = PetscFree(bseq);CHKERRQ(ierr); 4629 if (!rowb) { 4630 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4631 } else { 4632 *rowb = isrowb; 4633 } 4634 if (!colb) { 4635 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4636 } else { 4637 *colb = iscolb; 4638 } 4639 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4640 PetscFunctionReturn(0); 4641 } 4642 4643 #undef __FUNCT__ 4644 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4645 /* 4646 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4647 of the OFF-DIAGONAL portion of local A 4648 4649 Collective on Mat 4650 4651 Input Parameters: 4652 + A,B - the matrices in mpiaij format 4653 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4654 4655 Output Parameter: 4656 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4657 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4658 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4659 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4660 4661 Level: developer 4662 4663 */ 4664 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4665 { 4666 VecScatter_MPI_General *gen_to,*gen_from; 4667 PetscErrorCode ierr; 4668 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4669 Mat_SeqAIJ *b_oth; 4670 VecScatter ctx =a->Mvctx; 4671 MPI_Comm comm; 4672 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4673 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4674 PetscScalar *rvalues,*svalues; 4675 MatScalar *b_otha,*bufa,*bufA; 4676 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4677 MPI_Request *rwaits = NULL,*swaits = NULL; 4678 MPI_Status *sstatus,rstatus; 4679 PetscMPIInt jj,size; 4680 PetscInt *cols,sbs,rbs; 4681 PetscScalar *vals; 4682 4683 PetscFunctionBegin; 4684 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4685 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4686 if (size == 1) PetscFunctionReturn(0); 4687 4688 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4689 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4690 } 4691 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4692 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4693 4694 gen_to = (VecScatter_MPI_General*)ctx->todata; 4695 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4696 rvalues = gen_from->values; /* holds the length of receiving row */ 4697 svalues = gen_to->values; /* holds the length of sending row */ 4698 nrecvs = gen_from->n; 4699 nsends = gen_to->n; 4700 4701 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4702 srow = gen_to->indices; /* local row index to be sent */ 4703 sstarts = gen_to->starts; 4704 sprocs = gen_to->procs; 4705 sstatus = gen_to->sstatus; 4706 sbs = gen_to->bs; 4707 rstarts = gen_from->starts; 4708 rprocs = gen_from->procs; 4709 rbs = gen_from->bs; 4710 4711 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4712 if (scall == MAT_INITIAL_MATRIX) { 4713 /* i-array */ 4714 /*---------*/ 4715 /* post receives */ 4716 for (i=0; i<nrecvs; i++) { 4717 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4718 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4719 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4720 } 4721 4722 /* pack the outgoing message */ 4723 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4724 4725 sstartsj[0] = 0; 4726 rstartsj[0] = 0; 4727 len = 0; /* total length of j or a array to be sent */ 4728 k = 0; 4729 for (i=0; i<nsends; i++) { 4730 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4731 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4732 for (j=0; j<nrows; j++) { 4733 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4734 for (l=0; l<sbs; l++) { 4735 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4736 4737 rowlen[j*sbs+l] = ncols; 4738 4739 len += ncols; 4740 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4741 } 4742 k++; 4743 } 4744 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4745 4746 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4747 } 4748 /* recvs and sends of i-array are completed */ 4749 i = nrecvs; 4750 while (i--) { 4751 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4752 } 4753 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4754 4755 /* allocate buffers for sending j and a arrays */ 4756 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 4757 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 4758 4759 /* create i-array of B_oth */ 4760 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 4761 4762 b_othi[0] = 0; 4763 len = 0; /* total length of j or a array to be received */ 4764 k = 0; 4765 for (i=0; i<nrecvs; i++) { 4766 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4767 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4768 for (j=0; j<nrows; j++) { 4769 b_othi[k+1] = b_othi[k] + rowlen[j]; 4770 len += rowlen[j]; k++; 4771 } 4772 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4773 } 4774 4775 /* allocate space for j and a arrrays of B_oth */ 4776 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 4777 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 4778 4779 /* j-array */ 4780 /*---------*/ 4781 /* post receives of j-array */ 4782 for (i=0; i<nrecvs; i++) { 4783 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4784 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4785 } 4786 4787 /* pack the outgoing message j-array */ 4788 k = 0; 4789 for (i=0; i<nsends; i++) { 4790 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4791 bufJ = bufj+sstartsj[i]; 4792 for (j=0; j<nrows; j++) { 4793 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4794 for (ll=0; ll<sbs; ll++) { 4795 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4796 for (l=0; l<ncols; l++) { 4797 *bufJ++ = cols[l]; 4798 } 4799 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4800 } 4801 } 4802 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4803 } 4804 4805 /* recvs and sends of j-array are completed */ 4806 i = nrecvs; 4807 while (i--) { 4808 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4809 } 4810 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4811 } else if (scall == MAT_REUSE_MATRIX) { 4812 sstartsj = *startsj_s; 4813 rstartsj = *startsj_r; 4814 bufa = *bufa_ptr; 4815 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4816 b_otha = b_oth->a; 4817 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4818 4819 /* a-array */ 4820 /*---------*/ 4821 /* post receives of a-array */ 4822 for (i=0; i<nrecvs; i++) { 4823 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4824 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4825 } 4826 4827 /* pack the outgoing message a-array */ 4828 k = 0; 4829 for (i=0; i<nsends; i++) { 4830 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4831 bufA = bufa+sstartsj[i]; 4832 for (j=0; j<nrows; j++) { 4833 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4834 for (ll=0; ll<sbs; ll++) { 4835 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4836 for (l=0; l<ncols; l++) { 4837 *bufA++ = vals[l]; 4838 } 4839 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4840 } 4841 } 4842 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4843 } 4844 /* recvs and sends of a-array are completed */ 4845 i = nrecvs; 4846 while (i--) { 4847 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4848 } 4849 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4850 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4851 4852 if (scall == MAT_INITIAL_MATRIX) { 4853 /* put together the new matrix */ 4854 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4855 4856 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4857 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4858 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4859 b_oth->free_a = PETSC_TRUE; 4860 b_oth->free_ij = PETSC_TRUE; 4861 b_oth->nonew = 0; 4862 4863 ierr = PetscFree(bufj);CHKERRQ(ierr); 4864 if (!startsj_s || !bufa_ptr) { 4865 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4866 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4867 } else { 4868 *startsj_s = sstartsj; 4869 *startsj_r = rstartsj; 4870 *bufa_ptr = bufa; 4871 } 4872 } 4873 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4874 PetscFunctionReturn(0); 4875 } 4876 4877 #undef __FUNCT__ 4878 #define __FUNCT__ "MatGetCommunicationStructs" 4879 /*@C 4880 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4881 4882 Not Collective 4883 4884 Input Parameters: 4885 . A - The matrix in mpiaij format 4886 4887 Output Parameter: 4888 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4889 . colmap - A map from global column index to local index into lvec 4890 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4891 4892 Level: developer 4893 4894 @*/ 4895 #if defined(PETSC_USE_CTABLE) 4896 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4897 #else 4898 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4899 #endif 4900 { 4901 Mat_MPIAIJ *a; 4902 4903 PetscFunctionBegin; 4904 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4905 PetscValidPointer(lvec, 2); 4906 PetscValidPointer(colmap, 3); 4907 PetscValidPointer(multScatter, 4); 4908 a = (Mat_MPIAIJ*) A->data; 4909 if (lvec) *lvec = a->lvec; 4910 if (colmap) *colmap = a->colmap; 4911 if (multScatter) *multScatter = a->Mvctx; 4912 PetscFunctionReturn(0); 4913 } 4914 4915 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4916 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4917 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4918 #if defined(PETSC_HAVE_ELEMENTAL) 4919 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4920 #endif 4921 4922 #undef __FUNCT__ 4923 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4924 /* 4925 Computes (B'*A')' since computing B*A directly is untenable 4926 4927 n p p 4928 ( ) ( ) ( ) 4929 m ( A ) * n ( B ) = m ( C ) 4930 ( ) ( ) ( ) 4931 4932 */ 4933 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4934 { 4935 PetscErrorCode ierr; 4936 Mat At,Bt,Ct; 4937 4938 PetscFunctionBegin; 4939 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4940 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4941 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4942 ierr = MatDestroy(&At);CHKERRQ(ierr); 4943 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4944 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4945 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4946 PetscFunctionReturn(0); 4947 } 4948 4949 #undef __FUNCT__ 4950 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4951 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4952 { 4953 PetscErrorCode ierr; 4954 PetscInt m=A->rmap->n,n=B->cmap->n; 4955 Mat Cmat; 4956 4957 PetscFunctionBegin; 4958 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4959 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4960 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4961 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4962 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4963 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4964 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4965 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4966 4967 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4968 4969 *C = Cmat; 4970 PetscFunctionReturn(0); 4971 } 4972 4973 /* ----------------------------------------------------------------*/ 4974 #undef __FUNCT__ 4975 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4976 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4977 { 4978 PetscErrorCode ierr; 4979 4980 PetscFunctionBegin; 4981 if (scall == MAT_INITIAL_MATRIX) { 4982 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4983 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4984 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4985 } 4986 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4987 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4988 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4989 PetscFunctionReturn(0); 4990 } 4991 4992 /*MC 4993 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4994 4995 Options Database Keys: 4996 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4997 4998 Level: beginner 4999 5000 .seealso: MatCreateAIJ() 5001 M*/ 5002 5003 #undef __FUNCT__ 5004 #define __FUNCT__ "MatCreate_MPIAIJ" 5005 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5006 { 5007 Mat_MPIAIJ *b; 5008 PetscErrorCode ierr; 5009 PetscMPIInt size; 5010 5011 PetscFunctionBegin; 5012 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5013 5014 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5015 B->data = (void*)b; 5016 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5017 B->assembled = PETSC_FALSE; 5018 B->insertmode = NOT_SET_VALUES; 5019 b->size = size; 5020 5021 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5022 5023 /* build cache for off array entries formed */ 5024 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5025 5026 b->donotstash = PETSC_FALSE; 5027 b->colmap = 0; 5028 b->garray = 0; 5029 b->roworiented = PETSC_TRUE; 5030 5031 /* stuff used for matrix vector multiply */ 5032 b->lvec = NULL; 5033 b->Mvctx = NULL; 5034 5035 /* stuff for MatGetRow() */ 5036 b->rowindices = 0; 5037 b->rowvalues = 0; 5038 b->getrowactive = PETSC_FALSE; 5039 5040 /* flexible pointer used in CUSP/CUSPARSE classes */ 5041 b->spptr = NULL; 5042 5043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5053 #if defined(PETSC_HAVE_ELEMENTAL) 5054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5055 #endif 5056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5059 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5060 PetscFunctionReturn(0); 5061 } 5062 5063 #undef __FUNCT__ 5064 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5065 /*@C 5066 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5067 and "off-diagonal" part of the matrix in CSR format. 5068 5069 Collective on MPI_Comm 5070 5071 Input Parameters: 5072 + comm - MPI communicator 5073 . m - number of local rows (Cannot be PETSC_DECIDE) 5074 . n - This value should be the same as the local size used in creating the 5075 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5076 calculated if N is given) For square matrices n is almost always m. 5077 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5078 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5079 . i - row indices for "diagonal" portion of matrix 5080 . j - column indices 5081 . a - matrix values 5082 . oi - row indices for "off-diagonal" portion of matrix 5083 . oj - column indices 5084 - oa - matrix values 5085 5086 Output Parameter: 5087 . mat - the matrix 5088 5089 Level: advanced 5090 5091 Notes: 5092 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5093 must free the arrays once the matrix has been destroyed and not before. 5094 5095 The i and j indices are 0 based 5096 5097 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5098 5099 This sets local rows and cannot be used to set off-processor values. 5100 5101 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5102 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5103 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5104 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5105 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5106 communication if it is known that only local entries will be set. 5107 5108 .keywords: matrix, aij, compressed row, sparse, parallel 5109 5110 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5111 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5112 C@*/ 5113 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5114 { 5115 PetscErrorCode ierr; 5116 Mat_MPIAIJ *maij; 5117 5118 PetscFunctionBegin; 5119 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5120 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5121 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5122 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5123 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5124 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5125 maij = (Mat_MPIAIJ*) (*mat)->data; 5126 5127 (*mat)->preallocated = PETSC_TRUE; 5128 5129 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5130 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5131 5132 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5133 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5134 5135 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5136 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5137 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5138 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5139 5140 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5141 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5142 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5143 PetscFunctionReturn(0); 5144 } 5145 5146 /* 5147 Special version for direct calls from Fortran 5148 */ 5149 #include <petsc-private/fortranimpl.h> 5150 5151 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5152 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5153 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5154 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5155 #endif 5156 5157 /* Change these macros so can be used in void function */ 5158 #undef CHKERRQ 5159 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5160 #undef SETERRQ2 5161 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5162 #undef SETERRQ3 5163 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5164 #undef SETERRQ 5165 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5166 5167 #undef __FUNCT__ 5168 #define __FUNCT__ "matsetvaluesmpiaij_" 5169 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5170 { 5171 Mat mat = *mmat; 5172 PetscInt m = *mm, n = *mn; 5173 InsertMode addv = *maddv; 5174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5175 PetscScalar value; 5176 PetscErrorCode ierr; 5177 5178 MatCheckPreallocated(mat,1); 5179 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5180 5181 #if defined(PETSC_USE_DEBUG) 5182 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5183 #endif 5184 { 5185 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5186 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5187 PetscBool roworiented = aij->roworiented; 5188 5189 /* Some Variables required in the macro */ 5190 Mat A = aij->A; 5191 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5192 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5193 MatScalar *aa = a->a; 5194 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5195 Mat B = aij->B; 5196 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5197 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5198 MatScalar *ba = b->a; 5199 5200 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5201 PetscInt nonew = a->nonew; 5202 MatScalar *ap1,*ap2; 5203 5204 PetscFunctionBegin; 5205 for (i=0; i<m; i++) { 5206 if (im[i] < 0) continue; 5207 #if defined(PETSC_USE_DEBUG) 5208 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5209 #endif 5210 if (im[i] >= rstart && im[i] < rend) { 5211 row = im[i] - rstart; 5212 lastcol1 = -1; 5213 rp1 = aj + ai[row]; 5214 ap1 = aa + ai[row]; 5215 rmax1 = aimax[row]; 5216 nrow1 = ailen[row]; 5217 low1 = 0; 5218 high1 = nrow1; 5219 lastcol2 = -1; 5220 rp2 = bj + bi[row]; 5221 ap2 = ba + bi[row]; 5222 rmax2 = bimax[row]; 5223 nrow2 = bilen[row]; 5224 low2 = 0; 5225 high2 = nrow2; 5226 5227 for (j=0; j<n; j++) { 5228 if (roworiented) value = v[i*n+j]; 5229 else value = v[i+j*m]; 5230 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5231 if (in[j] >= cstart && in[j] < cend) { 5232 col = in[j] - cstart; 5233 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5234 } else if (in[j] < 0) continue; 5235 #if defined(PETSC_USE_DEBUG) 5236 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5237 #endif 5238 else { 5239 if (mat->was_assembled) { 5240 if (!aij->colmap) { 5241 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5242 } 5243 #if defined(PETSC_USE_CTABLE) 5244 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5245 col--; 5246 #else 5247 col = aij->colmap[in[j]] - 1; 5248 #endif 5249 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5250 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5251 col = in[j]; 5252 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5253 B = aij->B; 5254 b = (Mat_SeqAIJ*)B->data; 5255 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5256 rp2 = bj + bi[row]; 5257 ap2 = ba + bi[row]; 5258 rmax2 = bimax[row]; 5259 nrow2 = bilen[row]; 5260 low2 = 0; 5261 high2 = nrow2; 5262 bm = aij->B->rmap->n; 5263 ba = b->a; 5264 } 5265 } else col = in[j]; 5266 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5267 } 5268 } 5269 } else if (!aij->donotstash) { 5270 if (roworiented) { 5271 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5272 } else { 5273 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5274 } 5275 } 5276 } 5277 } 5278 PetscFunctionReturnVoid(); 5279 } 5280 5281