1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory((PetscObject)mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 716 717 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 718 719 aij->rowvalues = 0; 720 721 /* used by MatAXPY() */ 722 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 723 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 724 725 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 726 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 750 PetscInt i,*owners = A->rmap->range; 751 PetscInt *nprocs,j,idx,nsends,row; 752 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 753 PetscInt *rvalues,count,base,slen,*source; 754 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 755 MPI_Comm comm; 756 MPI_Request *send_waits,*recv_waits; 757 MPI_Status recv_status,*send_status; 758 const PetscScalar *xx; 759 PetscScalar *bb; 760 #if defined(PETSC_DEBUG) 761 PetscBool found = PETSC_FALSE; 762 #endif 763 764 PetscFunctionBegin; 765 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 766 /* first count number of contributors to each processor */ 767 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 768 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 769 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 770 j = 0; 771 for (i=0; i<N; i++) { 772 if (lastidx > (idx = rows[i])) j = 0; 773 lastidx = idx; 774 for (; j<size; j++) { 775 if (idx >= owners[j] && idx < owners[j+1]) { 776 nprocs[2*j]++; 777 nprocs[2*j+1] = 1; 778 owner[i] = j; 779 #if defined(PETSC_DEBUG) 780 found = PETSC_TRUE; 781 #endif 782 break; 783 } 784 } 785 #if defined(PETSC_DEBUG) 786 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 787 found = PETSC_FALSE; 788 #endif 789 } 790 nsends = 0; 791 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 792 793 if (A->nooffproczerorows) { 794 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 795 nrecvs = nsends; 796 nmax = N; 797 } else { 798 /* inform other processors of number of messages and max length*/ 799 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 800 } 801 802 /* post receives: */ 803 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 804 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 805 for (i=0; i<nrecvs; i++) { 806 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 807 } 808 809 /* do sends: 810 1) starts[i] gives the starting index in svalues for stuff going to 811 the ith processor 812 */ 813 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 814 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 815 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 816 817 starts[0] = 0; 818 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 819 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 820 821 starts[0] = 0; 822 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 823 count = 0; 824 for (i=0; i<size; i++) { 825 if (nprocs[2*i+1]) { 826 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 827 } 828 } 829 ierr = PetscFree(starts);CHKERRQ(ierr); 830 831 base = owners[rank]; 832 833 /* wait on receives */ 834 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 835 count = nrecvs; slen = 0; 836 while (count) { 837 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 838 /* unpack receives into our local space */ 839 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 840 841 source[imdex] = recv_status.MPI_SOURCE; 842 lens[imdex] = n; 843 slen += n; 844 count--; 845 } 846 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 847 848 /* move the data into the send scatter */ 849 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 850 count = 0; 851 for (i=0; i<nrecvs; i++) { 852 values = rvalues + i*nmax; 853 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 854 } 855 ierr = PetscFree(rvalues);CHKERRQ(ierr); 856 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 857 ierr = PetscFree(owner);CHKERRQ(ierr); 858 ierr = PetscFree(nprocs);CHKERRQ(ierr); 859 860 /* fix right hand side if needed */ 861 if (x && b) { 862 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 864 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 865 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 866 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 867 } 868 /* 869 Zero the required rows. If the "diagonal block" of the matrix 870 is square and the user wishes to set the diagonal we use separate 871 code so that MatSetValues() is not called for each diagonal allocating 872 new memory, thus calling lots of mallocs and slowing things down. 873 874 */ 875 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 876 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 877 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 878 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 879 } else if (diag != 0.0) { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 931 /* first count number of contributors to each processor */ 932 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 933 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 934 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 935 j = 0; 936 for (i=0; i<N; i++) { 937 if (lastidx > (idx = rows[i])) j = 0; 938 lastidx = idx; 939 for (; j<size; j++) { 940 if (idx >= owners[j] && idx < owners[j+1]) { 941 nprocs[2*j]++; 942 nprocs[2*j+1] = 1; 943 owner[i] = j; 944 #if defined(PETSC_DEBUG) 945 found = PETSC_TRUE; 946 #endif 947 break; 948 } 949 } 950 #if defined(PETSC_DEBUG) 951 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 952 found = PETSC_FALSE; 953 #endif 954 } 955 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 956 957 /* inform other processors of number of messages and max length*/ 958 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 959 960 /* post receives: */ 961 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 963 for (i=0; i<nrecvs; i++) { 964 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 965 } 966 967 /* do sends: 968 1) starts[i] gives the starting index in svalues for stuff going to 969 the ith processor 970 */ 971 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 972 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 973 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 974 975 starts[0] = 0; 976 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 999 source[imdex] = recv_status.MPI_SOURCE; 1000 lens[imdex] = n; 1001 slen += n; 1002 count--; 1003 } 1004 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1005 1006 /* move the data into the send scatter */ 1007 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1008 count = 0; 1009 for (i=0; i<nrecvs; i++) { 1010 values = rvalues + i*nmax; 1011 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1012 } 1013 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1014 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1015 ierr = PetscFree(owner);CHKERRQ(ierr); 1016 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1017 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1018 1019 /* zero diagonal part of matrix */ 1020 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1021 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x) { 1032 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1035 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1036 } 1037 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1038 1039 /* remove zeroed rows of off diagonal matrix */ 1040 ii = aij->i; 1041 for (i=0; i<slen; i++) { 1042 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1043 } 1044 1045 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1046 if (aij->compressedrow.use) { 1047 m = aij->compressedrow.nrows; 1048 ii = aij->compressedrow.i; 1049 ridx = aij->compressedrow.rindex; 1050 for (i=0; i<m; i++) { 1051 n = ii[i+1] - ii[i]; 1052 aj = aij->j + ii[i]; 1053 aa = aij->a + ii[i]; 1054 1055 for (j=0; j<n; j++) { 1056 if (PetscAbsScalar(mask[*aj])) { 1057 if (b) bb[*ridx] -= *aa*xx[*aj]; 1058 *aa = 0.0; 1059 } 1060 aa++; 1061 aj++; 1062 } 1063 ridx++; 1064 } 1065 } else { /* do not use compressed row format */ 1066 m = l->B->rmap->n; 1067 for (i=0; i<m; i++) { 1068 n = ii[i+1] - ii[i]; 1069 aj = aij->j + ii[i]; 1070 aa = aij->a + ii[i]; 1071 for (j=0; j<n; j++) { 1072 if (PetscAbsScalar(mask[*aj])) { 1073 if (b) bb[i] -= *aa*xx[*aj]; 1074 *aa = 0.0; 1075 } 1076 aa++; 1077 aj++; 1078 } 1079 } 1080 } 1081 if (x) { 1082 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1083 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1084 } 1085 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1086 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1087 ierr = PetscFree(lrows);CHKERRQ(ierr); 1088 1089 /* wait on sends */ 1090 if (nsends) { 1091 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1092 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1093 ierr = PetscFree(send_status);CHKERRQ(ierr); 1094 } 1095 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1096 ierr = PetscFree(svalues);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 /* do nondiagonal part */ 1227 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1228 /* send it on its way */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 /* do local part */ 1231 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1232 /* receive remote parts */ 1233 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* 1238 This only works correctly for square matrices where the subblock A->A is the 1239 diagonal block 1240 */ 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1244 { 1245 PetscErrorCode ierr; 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1250 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1251 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatScale_MPIAIJ" 1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1258 { 1259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1264 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_MPIAIJ" 1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1278 #endif 1279 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1283 #if defined(PETSC_USE_CTABLE) 1284 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1285 #else 1286 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1287 #endif 1288 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1289 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1290 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1291 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1292 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1313 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1316 int fd; 1317 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1318 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1319 PetscScalar *column_values; 1320 PetscInt message_count,flowcontrolcount; 1321 FILE *file; 1322 1323 PetscFunctionBegin; 1324 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1326 nz = A->nz + B->nz; 1327 if (!rank) { 1328 header[0] = MAT_FILE_CLASSID; 1329 header[1] = mat->rmap->N; 1330 header[2] = mat->cmap->N; 1331 1332 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1333 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 /* get largest number of rows any processor has */ 1336 rlen = mat->rmap->n; 1337 range = mat->rmap->range; 1338 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1339 } else { 1340 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 rlen = mat->rmap->n; 1342 } 1343 1344 /* load up the local row counts */ 1345 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1346 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 1348 /* store the row lengths to the file */ 1349 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1350 if (!rank) { 1351 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 for (i=1; i<size; i++) { 1353 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1354 rlen = range[i+1] - range[i]; 1355 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1361 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1363 } 1364 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1365 1366 /* load up the local column indices */ 1367 nzmax = nz; /* th processor needs space a largest processor needs */ 1368 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1370 cnt = 0; 1371 for (i=0; i<mat->rmap->n; i++) { 1372 for (j=B->i[i]; j<B->i[i+1]; j++) { 1373 if ((col = garray[B->j[j]]) > cstart) break; 1374 column_indices[cnt++] = col; 1375 } 1376 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1377 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1378 } 1379 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1380 1381 /* store the column indices to the file */ 1382 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1383 if (!rank) { 1384 MPI_Status status; 1385 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1386 for (i=1; i<size; i++) { 1387 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1388 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1389 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1390 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1394 } else { 1395 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1396 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1401 1402 /* load up the local column values */ 1403 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1404 cnt = 0; 1405 for (i=0; i<mat->rmap->n; i++) { 1406 for (j=B->i[i]; j<B->i[i+1]; j++) { 1407 if (garray[B->j[j]] > cstart) break; 1408 column_values[cnt++] = B->a[j]; 1409 } 1410 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1411 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1412 } 1413 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1414 1415 /* store the column values to the file */ 1416 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1417 if (!rank) { 1418 MPI_Status status; 1419 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1420 for (i=1; i<size; i++) { 1421 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1422 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1423 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1424 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1426 } 1427 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1430 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(column_values);CHKERRQ(ierr); 1435 1436 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1437 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #include <petscdraw.h> 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1445 { 1446 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1447 PetscErrorCode ierr; 1448 PetscMPIInt rank = aij->rank,size = aij->size; 1449 PetscBool isdraw,iascii,isbinary; 1450 PetscViewer sviewer; 1451 PetscViewerFormat format; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 if (iascii) { 1458 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1459 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1460 MatInfo info; 1461 PetscBool inodes; 1462 1463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1464 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1465 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 if (!inodes) { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1472 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1473 } 1474 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1481 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1484 PetscInt inodecount,inodelimit,*inodes; 1485 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1486 if (inodes) { 1487 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1488 } else { 1489 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } 1495 } else if (isbinary) { 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (isdraw) { 1504 PetscDraw draw; 1505 PetscBool isnull; 1506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1508 } 1509 1510 if (size == 1) { 1511 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1513 } else { 1514 /* assemble the entire matrix onto first processor. */ 1515 Mat A; 1516 Mat_SeqAIJ *Aloc; 1517 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1518 MatScalar *a; 1519 1520 if (mat->rmap->N > 1024) { 1521 PetscBool flg = PETSC_FALSE; 1522 1523 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1525 } 1526 1527 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1532 } 1533 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1534 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1535 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1536 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1537 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqAIJ*)aij->A->data; 1541 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1546 row++; 1547 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1548 } 1549 aj = Aloc->j; 1550 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1551 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqAIJ*)aij->B->data; 1554 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1557 ct = cols; 1558 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1559 for (i=0; i<m; i++) { 1560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1561 row++; 1562 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1563 } 1564 ierr = PetscFree(ct);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 /* 1568 Everyone has to call to draw the matrix since the graphics waits are 1569 synchronized across all processors that share the PetscDraw object 1570 */ 1571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1574 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1575 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1577 } 1578 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_MPIAIJ" 1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1587 { 1588 PetscErrorCode ierr; 1589 PetscBool iascii,isdraw,issocket,isbinary; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1596 if (iascii || isdraw || isbinary || issocket) { 1597 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatSOR_MPIAIJ" 1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1607 PetscErrorCode ierr; 1608 Vec bb1 = 0; 1609 PetscBool hasop; 1610 1611 PetscFunctionBegin; 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1618 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1619 } 1620 1621 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1657 its--; 1658 } 1659 while (its--) { 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 1663 /* update rhs: bb1 = bb - B*x */ 1664 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1665 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1666 1667 /* local sweep */ 1668 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1669 } 1670 } else if (flag & SOR_EISENSTAT) { 1671 Vec xx1; 1672 1673 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1675 1676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 if (!mat->diag) { 1679 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1680 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1681 } 1682 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1683 if (hasop) { 1684 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1685 } else { 1686 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1687 } 1688 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1689 1690 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1691 1692 /* local sweep */ 1693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1694 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1695 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1696 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1697 1698 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatPermute_MPIAIJ" 1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1705 { 1706 Mat aA,aB,Aperm; 1707 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1708 PetscScalar *aa,*ba; 1709 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1710 PetscSF rowsf,sf; 1711 IS parcolp = NULL; 1712 PetscBool done; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1717 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1718 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1719 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1720 1721 /* Invert row permutation to find out where my rows should go */ 1722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1723 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1724 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1725 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1726 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1728 1729 /* Invert column permutation to find out where my columns should go */ 1730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1731 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1732 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1733 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1734 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1737 1738 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1740 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1741 1742 /* Find out where my gcols should go */ 1743 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1744 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1751 1752 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1757 for (i=0; i<m; i++) { 1758 PetscInt row = rdest[i],rowner; 1759 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1760 for (j=ai[i]; j<ai[i+1]; j++) { 1761 PetscInt cowner,col = cdest[aj[j]]; 1762 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1763 if (rowner == cowner) dnnz[i]++; 1764 else onnz[i]++; 1765 } 1766 for (j=bi[i]; j<bi[i+1]; j++) { 1767 PetscInt cowner,col = gcdest[bj[j]]; 1768 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1769 if (rowner == cowner) dnnz[i]++; 1770 else onnz[i]++; 1771 } 1772 } 1773 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1780 1781 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1784 for (i=0; i<m; i++) { 1785 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1786 PetscInt rowlen; 1787 rowlen = ai[i+1] - ai[i]; 1788 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1790 rowlen = bi[i+1] - bi[i]; 1791 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1792 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1800 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1802 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1803 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1804 *B = Aperm; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1813 Mat A = mat->A,B = mat->B; 1814 PetscErrorCode ierr; 1815 PetscReal isend[5],irecv[5]; 1816 1817 PetscFunctionBegin; 1818 info->block_size = 1.0; 1819 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1820 1821 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1822 isend[3] = info->memory; isend[4] = info->mallocs; 1823 1824 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1825 1826 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1827 isend[3] += info->memory; isend[4] += info->mallocs; 1828 if (flag == MAT_LOCAL) { 1829 info->nz_used = isend[0]; 1830 info->nz_allocated = isend[1]; 1831 info->nz_unneeded = isend[2]; 1832 info->memory = isend[3]; 1833 info->mallocs = isend[4]; 1834 } else if (flag == MAT_GLOBAL_MAX) { 1835 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1836 1837 info->nz_used = irecv[0]; 1838 info->nz_allocated = irecv[1]; 1839 info->nz_unneeded = irecv[2]; 1840 info->memory = irecv[3]; 1841 info->mallocs = irecv[4]; 1842 } else if (flag == MAT_GLOBAL_SUM) { 1843 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1844 1845 info->nz_used = irecv[0]; 1846 info->nz_allocated = irecv[1]; 1847 info->nz_unneeded = irecv[2]; 1848 info->memory = irecv[3]; 1849 info->mallocs = irecv[4]; 1850 } 1851 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1852 info->fill_ratio_needed = 0; 1853 info->factor_mallocs = 0; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetOption_MPIAIJ" 1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1860 { 1861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 switch (op) { 1866 case MAT_NEW_NONZERO_LOCATIONS: 1867 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1868 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1869 case MAT_KEEP_NONZERO_PATTERN: 1870 case MAT_NEW_NONZERO_LOCATION_ERR: 1871 case MAT_USE_INODES: 1872 case MAT_IGNORE_ZERO_ENTRIES: 1873 MatCheckPreallocated(A,1); 1874 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1875 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1876 break; 1877 case MAT_ROW_ORIENTED: 1878 a->roworiented = flg; 1879 1880 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1881 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1882 break; 1883 case MAT_NEW_DIAGONALS: 1884 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1885 break; 1886 case MAT_IGNORE_OFF_PROC_ENTRIES: 1887 a->donotstash = flg; 1888 break; 1889 case MAT_SPD: 1890 A->spd_set = PETSC_TRUE; 1891 A->spd = flg; 1892 if (flg) { 1893 A->symmetric = PETSC_TRUE; 1894 A->structurally_symmetric = PETSC_TRUE; 1895 A->symmetric_set = PETSC_TRUE; 1896 A->structurally_symmetric_set = PETSC_TRUE; 1897 } 1898 break; 1899 case MAT_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_STRUCTURALLY_SYMMETRIC: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_HERMITIAN: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 case MAT_SYMMETRY_ETERNAL: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 default: 1912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRow_MPIAIJ" 1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1922 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1923 PetscErrorCode ierr; 1924 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1925 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1926 PetscInt *cmap,*idx_p; 1927 1928 PetscFunctionBegin; 1929 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1930 mat->getrowactive = PETSC_TRUE; 1931 1932 if (!mat->rowvalues && (idx || v)) { 1933 /* 1934 allocate enough space to hold information from the longest row. 1935 */ 1936 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1937 PetscInt max = 1,tmp; 1938 for (i=0; i<matin->rmap->n; i++) { 1939 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1940 if (max < tmp) max = tmp; 1941 } 1942 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1943 } 1944 1945 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1946 lrow = row - rstart; 1947 1948 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1949 if (!v) {pvA = 0; pvB = 0;} 1950 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1951 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 nztot = nzA + nzB; 1954 1955 cmap = mat->garray; 1956 if (v || idx) { 1957 if (nztot) { 1958 /* Sort by increasing column numbers, assuming A and B already sorted */ 1959 PetscInt imark = -1; 1960 if (v) { 1961 *v = v_p = mat->rowvalues; 1962 for (i=0; i<nzB; i++) { 1963 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1964 else break; 1965 } 1966 imark = i; 1967 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1968 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1969 } 1970 if (idx) { 1971 *idx = idx_p = mat->rowindices; 1972 if (imark > -1) { 1973 for (i=0; i<imark; i++) { 1974 idx_p[i] = cmap[cworkB[i]]; 1975 } 1976 } else { 1977 for (i=0; i<nzB; i++) { 1978 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1979 else break; 1980 } 1981 imark = i; 1982 } 1983 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1984 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1985 } 1986 } else { 1987 if (idx) *idx = 0; 1988 if (v) *v = 0; 1989 } 1990 } 1991 *nz = nztot; 1992 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1993 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 2003 PetscFunctionBegin; 2004 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2005 aij->getrowactive = PETSC_FALSE; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatNorm_MPIAIJ" 2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2015 PetscErrorCode ierr; 2016 PetscInt i,j,cstart = mat->cmap->rstart; 2017 PetscReal sum = 0.0; 2018 MatScalar *v; 2019 2020 PetscFunctionBegin; 2021 if (aij->size == 1) { 2022 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2023 } else { 2024 if (type == NORM_FROBENIUS) { 2025 v = amat->a; 2026 for (i=0; i<amat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 v = bmat->a; 2030 for (i=0; i<bmat->nz; i++) { 2031 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2032 } 2033 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2034 *norm = PetscSqrtReal(*norm); 2035 } else if (type == NORM_1) { /* max column norm */ 2036 PetscReal *tmp,*tmp2; 2037 PetscInt *jj,*garray = aij->garray; 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2040 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2041 *norm = 0.0; 2042 v = amat->a; jj = amat->j; 2043 for (j=0; j<amat->nz; j++) { 2044 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2045 } 2046 v = bmat->a; jj = bmat->j; 2047 for (j=0; j<bmat->nz; j++) { 2048 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2049 } 2050 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2051 for (j=0; j<mat->cmap->N; j++) { 2052 if (tmp2[j] > *norm) *norm = tmp2[j]; 2053 } 2054 ierr = PetscFree(tmp);CHKERRQ(ierr); 2055 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2056 } else if (type == NORM_INFINITY) { /* max row norm */ 2057 PetscReal ntemp = 0.0; 2058 for (j=0; j<aij->A->rmap->n; j++) { 2059 v = amat->a + amat->i[j]; 2060 sum = 0.0; 2061 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 v = bmat->a + bmat->i[j]; 2065 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2066 sum += PetscAbsScalar(*v); v++; 2067 } 2068 if (sum > ntemp) ntemp = sum; 2069 } 2070 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2071 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatTranspose_MPIAIJ" 2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2079 { 2080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2081 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2082 PetscErrorCode ierr; 2083 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2084 PetscInt cstart = A->cmap->rstart,ncol; 2085 Mat B; 2086 MatScalar *array; 2087 2088 PetscFunctionBegin; 2089 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2090 2091 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2092 ai = Aloc->i; aj = Aloc->j; 2093 bi = Bloc->i; bj = Bloc->j; 2094 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2095 PetscInt *d_nnz,*g_nnz,*o_nnz; 2096 PetscSFNode *oloc; 2097 PETSC_UNUSED PetscSF sf; 2098 2099 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2100 /* compute d_nnz for preallocation */ 2101 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2102 for (i=0; i<ai[ma]; i++) { 2103 d_nnz[aj[i]]++; 2104 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2105 } 2106 /* compute local off-diagonal contributions */ 2107 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2109 /* map those to global */ 2110 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2111 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2112 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2113 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2117 2118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2119 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2120 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2121 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2124 } else { 2125 B = *matout; 2126 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2127 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2128 } 2129 2130 /* copy over the A part */ 2131 array = Aloc->a; 2132 row = A->rmap->rstart; 2133 for (i=0; i<ma; i++) { 2134 ncol = ai[i+1]-ai[i]; 2135 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2136 row++; 2137 array += ncol; aj += ncol; 2138 } 2139 aj = Aloc->j; 2140 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2141 2142 /* copy over the B part */ 2143 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2145 array = Bloc->a; 2146 row = A->rmap->rstart; 2147 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2148 cols_tmp = cols; 2149 for (i=0; i<mb; i++) { 2150 ncol = bi[i+1]-bi[i]; 2151 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; 2153 array += ncol; cols_tmp += ncol; 2154 } 2155 ierr = PetscFree(cols);CHKERRQ(ierr); 2156 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2160 *matout = B; 2161 } else { 2162 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2170 { 2171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2172 Mat a = aij->A,b = aij->B; 2173 PetscErrorCode ierr; 2174 PetscInt s1,s2,s3; 2175 2176 PetscFunctionBegin; 2177 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2178 if (rr) { 2179 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2180 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2181 /* Overlap communication with computation. */ 2182 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 if (ll) { 2185 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2186 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2187 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2188 } 2189 /* scale the diagonal block */ 2190 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2191 2192 if (rr) { 2193 /* Do a scatter end and then right scale the off-diagonal block */ 2194 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatEqual_MPIAIJ" 2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2215 { 2216 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2217 Mat a,b,c,d; 2218 PetscBool flg; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 a = matA->A; b = matA->B; 2223 c = matB->A; d = matB->B; 2224 2225 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2226 if (flg) { 2227 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2228 } 2229 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCopy_MPIAIJ" 2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2239 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2240 2241 PetscFunctionBegin; 2242 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2243 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2244 /* because of the column compression in the off-processor part of the matrix a->B, 2245 the number of columns in a->B and b->B may be different, hence we cannot call 2246 the MatCopy() directly on the two parts. If need be, we can provide a more 2247 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2248 then copying the submatrices */ 2249 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2252 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatSetUp_MPIAIJ" 2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2272 { 2273 PetscInt i,m=Y->rmap->N; 2274 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2275 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2276 const PetscInt *xi = x->i,*yi = y->i; 2277 2278 PetscFunctionBegin; 2279 /* Set the number of nonzeros in the new matrix */ 2280 for (i=0; i<m; i++) { 2281 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2282 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2283 nnz[i] = 0; 2284 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2285 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2286 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2287 nnz[i]++; 2288 } 2289 for (; k<nzy; k++) nnz[i]++; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPY_MPIAIJ" 2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2297 { 2298 PetscErrorCode ierr; 2299 PetscInt i; 2300 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2301 PetscBLASInt bnz,one=1; 2302 Mat_SeqAIJ *x,*y; 2303 2304 PetscFunctionBegin; 2305 if (str == SAME_NONZERO_PATTERN) { 2306 PetscScalar alpha = a; 2307 x = (Mat_SeqAIJ*)xx->A->data; 2308 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2309 y = (Mat_SeqAIJ*)yy->A->data; 2310 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2311 x = (Mat_SeqAIJ*)xx->B->data; 2312 y = (Mat_SeqAIJ*)yy->B->data; 2313 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2314 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2315 } else if (str == SUBSET_NONZERO_PATTERN) { 2316 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2317 2318 x = (Mat_SeqAIJ*)xx->B->data; 2319 y = (Mat_SeqAIJ*)yy->B->data; 2320 if (y->xtoy && y->XtoY != xx->B) { 2321 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2322 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2323 } 2324 if (!y->xtoy) { /* get xtoy */ 2325 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2326 y->XtoY = xx->B; 2327 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2330 } else { 2331 Mat B; 2332 PetscInt *nnz_d,*nnz_o; 2333 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2334 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2335 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2337 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2338 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2339 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2343 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2344 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatConjugate_MPIAIJ" 2355 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2356 { 2357 #if defined(PETSC_USE_COMPLEX) 2358 PetscErrorCode ierr; 2359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2360 2361 PetscFunctionBegin; 2362 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2363 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2364 #else 2365 PetscFunctionBegin; 2366 #endif 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatRealPart_MPIAIJ" 2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2373 { 2374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2379 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2392 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #if defined(PETSC_HAVE_PBGL) 2397 2398 #include <boost/parallel/mpi/bsp_process_group.hpp> 2399 #include <boost/graph/distributed/ilu_default_graph.hpp> 2400 #include <boost/graph/distributed/ilu_0_block.hpp> 2401 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2402 #include <boost/graph/distributed/petsc/interface.hpp> 2403 #include <boost/multi_array.hpp> 2404 #include <boost/parallel/distributed_property_map->hpp> 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2408 /* 2409 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2410 */ 2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2412 { 2413 namespace petsc = boost::distributed::petsc; 2414 2415 namespace graph_dist = boost::graph::distributed; 2416 using boost::graph::distributed::ilu_default::process_group_type; 2417 using boost::graph::ilu_permuted; 2418 2419 PetscBool row_identity, col_identity; 2420 PetscContainer c; 2421 PetscInt m, n, M, N; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2426 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2427 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2428 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2429 2430 process_group_type pg; 2431 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2432 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2437 ilu_permuted(level_graph); 2438 2439 /* put together the new matrix */ 2440 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2441 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2442 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2443 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2445 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2446 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 2449 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2450 ierr = PetscContainerSetPointer(c, lgraph_p); 2451 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2452 ierr = PetscContainerDestroy(&c); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2459 { 2460 PetscFunctionBegin; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSolve_MPIAIJ" 2466 /* 2467 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2468 */ 2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2470 { 2471 namespace graph_dist = boost::graph::distributed; 2472 2473 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2474 lgraph_type *lgraph_p; 2475 PetscContainer c; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2480 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2481 ierr = VecCopy(b, x);CHKERRQ(ierr); 2482 2483 PetscScalar *array_x; 2484 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2485 PetscInt sx; 2486 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2487 2488 PetscScalar *array_b; 2489 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2490 PetscInt sb; 2491 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2492 2493 lgraph_type& level_graph = *lgraph_p; 2494 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2495 2496 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2497 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2498 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2499 2500 typedef boost::iterator_property_map<array_ref_type::iterator, 2501 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2502 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2503 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2504 2505 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2506 PetscFunctionReturn(0); 2507 } 2508 #endif 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatDestroy_MatRedundant" 2512 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2513 { 2514 PetscErrorCode ierr; 2515 Mat_Redundant *redund; 2516 PetscInt i; 2517 PetscMPIInt size; 2518 2519 PetscFunctionBegin; 2520 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2521 if (size == 1) { 2522 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2523 redund = a->redundant; 2524 } else { 2525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2526 redund = a->redundant; 2527 } 2528 if (redund){ 2529 if (redund->matseq) { /* via MatGetSubMatrices() */ 2530 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2531 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2532 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2533 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2534 } else { 2535 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2536 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2537 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2538 for (i=0; i<redund->nrecvs; i++) { 2539 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2540 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2541 } 2542 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2543 } 2544 2545 if (redund->psubcomm) { 2546 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2547 } 2548 ierr = redund->Destroy(A);CHKERRQ(ierr); 2549 ierr = PetscFree(redund);CHKERRQ(ierr); 2550 } 2551 PetscFunctionReturn(0); 2552 } 2553 2554 #undef __FUNCT__ 2555 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2556 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2557 { 2558 PetscMPIInt rank,size; 2559 MPI_Comm comm; 2560 PetscErrorCode ierr; 2561 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2562 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2563 PetscInt *rowrange = mat->rmap->range; 2564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2565 Mat A = aij->A,B=aij->B,C=*matredundant; 2566 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2567 PetscScalar *sbuf_a; 2568 PetscInt nzlocal=a->nz+b->nz; 2569 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2570 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2571 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2572 MatScalar *aworkA,*aworkB; 2573 PetscScalar *vals; 2574 PetscMPIInt tag1,tag2,tag3,imdex; 2575 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2576 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2577 MPI_Status recv_status,*send_status; 2578 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2579 PetscInt **rbuf_j=NULL; 2580 PetscScalar **rbuf_a=NULL; 2581 Mat_Redundant *redund =NULL; 2582 2583 PetscFunctionBegin; 2584 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2585 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2586 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2587 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2588 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2589 2590 if (reuse == MAT_REUSE_MATRIX) { 2591 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2592 if (subsize == 1) { 2593 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2594 redund = c->redundant; 2595 } else { 2596 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2597 redund = c->redundant; 2598 } 2599 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2600 2601 nsends = redund->nsends; 2602 nrecvs = redund->nrecvs; 2603 send_rank = redund->send_rank; 2604 recv_rank = redund->recv_rank; 2605 sbuf_nz = redund->sbuf_nz; 2606 rbuf_nz = redund->rbuf_nz; 2607 sbuf_j = redund->sbuf_j; 2608 sbuf_a = redund->sbuf_a; 2609 rbuf_j = redund->rbuf_j; 2610 rbuf_a = redund->rbuf_a; 2611 } 2612 2613 if (reuse == MAT_INITIAL_MATRIX) { 2614 PetscInt nleftover,np_subcomm; 2615 2616 /* get the destination processors' id send_rank, nsends and nrecvs */ 2617 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2618 2619 np_subcomm = size/nsubcomm; 2620 nleftover = size - nsubcomm*np_subcomm; 2621 2622 /* block of codes below is specific for INTERLACED */ 2623 /* ------------------------------------------------*/ 2624 nsends = 0; nrecvs = 0; 2625 for (i=0; i<size; i++) { 2626 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2627 send_rank[nsends++] = i; 2628 recv_rank[nrecvs++] = i; 2629 } 2630 } 2631 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2632 i = size-nleftover-1; 2633 j = 0; 2634 while (j < nsubcomm - nleftover) { 2635 send_rank[nsends++] = i; 2636 i--; j++; 2637 } 2638 } 2639 2640 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2641 for (i=0; i<nleftover; i++) { 2642 recv_rank[nrecvs++] = size-nleftover+i; 2643 } 2644 } 2645 /*----------------------------------------------*/ 2646 2647 /* allocate sbuf_j, sbuf_a */ 2648 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2649 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2650 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2651 /* 2652 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2653 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 2654 */ 2655 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2656 2657 /* copy mat's local entries into the buffers */ 2658 if (reuse == MAT_INITIAL_MATRIX) { 2659 rownz_max = 0; 2660 rptr = sbuf_j; 2661 cols = sbuf_j + rend-rstart + 1; 2662 vals = sbuf_a; 2663 rptr[0] = 0; 2664 for (i=0; i<rend-rstart; i++) { 2665 row = i + rstart; 2666 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2667 ncols = nzA + nzB; 2668 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2669 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2670 /* load the column indices for this row into cols */ 2671 lwrite = 0; 2672 for (l=0; l<nzB; l++) { 2673 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2674 vals[lwrite] = aworkB[l]; 2675 cols[lwrite++] = ctmp; 2676 } 2677 } 2678 for (l=0; l<nzA; l++) { 2679 vals[lwrite] = aworkA[l]; 2680 cols[lwrite++] = cstart + cworkA[l]; 2681 } 2682 for (l=0; l<nzB; l++) { 2683 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2684 vals[lwrite] = aworkB[l]; 2685 cols[lwrite++] = ctmp; 2686 } 2687 } 2688 vals += ncols; 2689 cols += ncols; 2690 rptr[i+1] = rptr[i] + ncols; 2691 if (rownz_max < ncols) rownz_max = ncols; 2692 } 2693 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2694 } else { /* only copy matrix values into sbuf_a */ 2695 rptr = sbuf_j; 2696 vals = sbuf_a; 2697 rptr[0] = 0; 2698 for (i=0; i<rend-rstart; i++) { 2699 row = i + rstart; 2700 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2701 ncols = nzA + nzB; 2702 cworkB = b->j + b->i[i]; 2703 aworkA = a->a + a->i[i]; 2704 aworkB = b->a + b->i[i]; 2705 lwrite = 0; 2706 for (l=0; l<nzB; l++) { 2707 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2708 } 2709 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2710 for (l=0; l<nzB; l++) { 2711 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2712 } 2713 vals += ncols; 2714 rptr[i+1] = rptr[i] + ncols; 2715 } 2716 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2717 2718 /* send nzlocal to others, and recv other's nzlocal */ 2719 /*--------------------------------------------------*/ 2720 if (reuse == MAT_INITIAL_MATRIX) { 2721 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2722 2723 s_waits2 = s_waits3 + nsends; 2724 s_waits1 = s_waits2 + nsends; 2725 r_waits1 = s_waits1 + nsends; 2726 r_waits2 = r_waits1 + nrecvs; 2727 r_waits3 = r_waits2 + nrecvs; 2728 } else { 2729 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2730 2731 r_waits3 = s_waits3 + nsends; 2732 } 2733 2734 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2735 if (reuse == MAT_INITIAL_MATRIX) { 2736 /* get new tags to keep the communication clean */ 2737 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2738 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2739 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2740 2741 /* post receives of other's nzlocal */ 2742 for (i=0; i<nrecvs; i++) { 2743 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2744 } 2745 /* send nzlocal to others */ 2746 for (i=0; i<nsends; i++) { 2747 sbuf_nz[i] = nzlocal; 2748 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2749 } 2750 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2751 count = nrecvs; 2752 while (count) { 2753 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2754 2755 recv_rank[imdex] = recv_status.MPI_SOURCE; 2756 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2757 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2758 2759 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2760 2761 rbuf_nz[imdex] += i + 2; 2762 2763 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2764 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2765 count--; 2766 } 2767 /* wait on sends of nzlocal */ 2768 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2769 /* send mat->i,j to others, and recv from other's */ 2770 /*------------------------------------------------*/ 2771 for (i=0; i<nsends; i++) { 2772 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2773 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2774 } 2775 /* wait on receives of mat->i,j */ 2776 /*------------------------------*/ 2777 count = nrecvs; 2778 while (count) { 2779 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2780 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2781 count--; 2782 } 2783 /* wait on sends of mat->i,j */ 2784 /*---------------------------*/ 2785 if (nsends) { 2786 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2787 } 2788 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2789 2790 /* post receives, send and receive mat->a */ 2791 /*----------------------------------------*/ 2792 for (imdex=0; imdex<nrecvs; imdex++) { 2793 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2794 } 2795 for (i=0; i<nsends; i++) { 2796 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2797 } 2798 count = nrecvs; 2799 while (count) { 2800 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2801 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2802 count--; 2803 } 2804 if (nsends) { 2805 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2806 } 2807 2808 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2809 2810 /* create redundant matrix */ 2811 /*-------------------------*/ 2812 if (reuse == MAT_INITIAL_MATRIX) { 2813 const PetscInt *range; 2814 PetscInt rstart_sub,rend_sub,mloc_sub; 2815 2816 /* compute rownz_max for preallocation */ 2817 for (imdex=0; imdex<nrecvs; imdex++) { 2818 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2819 rptr = rbuf_j[imdex]; 2820 for (i=0; i<j; i++) { 2821 ncols = rptr[i+1] - rptr[i]; 2822 if (rownz_max < ncols) rownz_max = ncols; 2823 } 2824 } 2825 2826 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2827 2828 /* get local size of redundant matrix 2829 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2830 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2831 rstart_sub = range[nsubcomm*subrank]; 2832 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2833 rend_sub = range[nsubcomm*(subrank+1)]; 2834 } else { 2835 rend_sub = mat->rmap->N; 2836 } 2837 mloc_sub = rend_sub - rstart_sub; 2838 2839 if (M == N) { 2840 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2841 } else { /* non-square matrix */ 2842 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2843 } 2844 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2845 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2846 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2847 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2848 } else { 2849 C = *matredundant; 2850 } 2851 2852 /* insert local matrix entries */ 2853 rptr = sbuf_j; 2854 cols = sbuf_j + rend-rstart + 1; 2855 vals = sbuf_a; 2856 for (i=0; i<rend-rstart; i++) { 2857 row = i + rstart; 2858 ncols = rptr[i+1] - rptr[i]; 2859 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2860 vals += ncols; 2861 cols += ncols; 2862 } 2863 /* insert received matrix entries */ 2864 for (imdex=0; imdex<nrecvs; imdex++) { 2865 rstart = rowrange[recv_rank[imdex]]; 2866 rend = rowrange[recv_rank[imdex]+1]; 2867 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2868 rptr = rbuf_j[imdex]; 2869 cols = rbuf_j[imdex] + rend-rstart + 1; 2870 vals = rbuf_a[imdex]; 2871 for (i=0; i<rend-rstart; i++) { 2872 row = i + rstart; 2873 ncols = rptr[i+1] - rptr[i]; 2874 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2875 vals += ncols; 2876 cols += ncols; 2877 } 2878 } 2879 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2880 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2881 2882 if (reuse == MAT_INITIAL_MATRIX) { 2883 *matredundant = C; 2884 2885 /* create a supporting struct and attach it to C for reuse */ 2886 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2887 if (subsize == 1) { 2888 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2889 c->redundant = redund; 2890 } else { 2891 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2892 c->redundant = redund; 2893 } 2894 2895 redund->nzlocal = nzlocal; 2896 redund->nsends = nsends; 2897 redund->nrecvs = nrecvs; 2898 redund->send_rank = send_rank; 2899 redund->recv_rank = recv_rank; 2900 redund->sbuf_nz = sbuf_nz; 2901 redund->rbuf_nz = rbuf_nz; 2902 redund->sbuf_j = sbuf_j; 2903 redund->sbuf_a = sbuf_a; 2904 redund->rbuf_j = rbuf_j; 2905 redund->rbuf_a = rbuf_a; 2906 redund->psubcomm = NULL; 2907 2908 redund->Destroy = C->ops->destroy; 2909 C->ops->destroy = MatDestroy_MatRedundant; 2910 } 2911 PetscFunctionReturn(0); 2912 } 2913 2914 #undef __FUNCT__ 2915 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2916 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2917 { 2918 PetscErrorCode ierr; 2919 MPI_Comm comm; 2920 PetscMPIInt size,subsize; 2921 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2922 Mat_Redundant *redund=NULL; 2923 PetscSubcomm psubcomm=NULL; 2924 MPI_Comm subcomm_in=subcomm; 2925 Mat *matseq; 2926 IS isrow,iscol; 2927 2928 PetscFunctionBegin; 2929 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2930 if (reuse == MAT_INITIAL_MATRIX) { 2931 /* create psubcomm, then get subcomm */ 2932 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2933 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2934 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2935 2936 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2937 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2938 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2939 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2940 subcomm = psubcomm->comm; 2941 } else { /* retrieve psubcomm and subcomm */ 2942 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2943 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2944 if (subsize == 1) { 2945 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2946 redund = c->redundant; 2947 } else { 2948 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2949 redund = c->redundant; 2950 } 2951 psubcomm = redund->psubcomm; 2952 } 2953 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2954 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2955 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2956 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2957 if (subsize == 1) { 2958 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2959 c->redundant->psubcomm = psubcomm; 2960 } else { 2961 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2962 c->redundant->psubcomm = psubcomm ; 2963 } 2964 } 2965 PetscFunctionReturn(0); 2966 } 2967 } 2968 2969 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2970 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2971 if (reuse == MAT_INITIAL_MATRIX) { 2972 /* create a local sequential matrix matseq[0] */ 2973 mloc_sub = PETSC_DECIDE; 2974 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2975 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2976 rstart = rend - mloc_sub; 2977 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2978 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2979 } else { /* reuse == MAT_REUSE_MATRIX */ 2980 if (subsize == 1) { 2981 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2982 redund = c->redundant; 2983 } else { 2984 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2985 redund = c->redundant; 2986 } 2987 2988 isrow = redund->isrow; 2989 iscol = redund->iscol; 2990 matseq = redund->matseq; 2991 } 2992 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2993 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2994 2995 if (reuse == MAT_INITIAL_MATRIX) { 2996 /* create a supporting struct and attach it to C for reuse */ 2997 ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr); 2998 if (subsize == 1) { 2999 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 3000 c->redundant = redund; 3001 } else { 3002 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 3003 c->redundant = redund; 3004 } 3005 redund->isrow = isrow; 3006 redund->iscol = iscol; 3007 redund->matseq = matseq; 3008 redund->psubcomm = psubcomm; 3009 redund->Destroy = (*matredundant)->ops->destroy; 3010 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 3011 } 3012 PetscFunctionReturn(0); 3013 } 3014 3015 #undef __FUNCT__ 3016 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 3017 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3018 { 3019 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3020 PetscErrorCode ierr; 3021 PetscInt i,*idxb = 0; 3022 PetscScalar *va,*vb; 3023 Vec vtmp; 3024 3025 PetscFunctionBegin; 3026 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 3027 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3028 if (idx) { 3029 for (i=0; i<A->rmap->n; i++) { 3030 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3031 } 3032 } 3033 3034 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3035 if (idx) { 3036 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3037 } 3038 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3039 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3040 3041 for (i=0; i<A->rmap->n; i++) { 3042 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 3043 va[i] = vb[i]; 3044 if (idx) idx[i] = a->garray[idxb[i]]; 3045 } 3046 } 3047 3048 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3049 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3050 ierr = PetscFree(idxb);CHKERRQ(ierr); 3051 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 #undef __FUNCT__ 3056 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 3057 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3058 { 3059 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3060 PetscErrorCode ierr; 3061 PetscInt i,*idxb = 0; 3062 PetscScalar *va,*vb; 3063 Vec vtmp; 3064 3065 PetscFunctionBegin; 3066 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 3067 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3068 if (idx) { 3069 for (i=0; i<A->cmap->n; i++) { 3070 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3071 } 3072 } 3073 3074 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3075 if (idx) { 3076 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3077 } 3078 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3079 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3080 3081 for (i=0; i<A->rmap->n; i++) { 3082 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 3083 va[i] = vb[i]; 3084 if (idx) idx[i] = a->garray[idxb[i]]; 3085 } 3086 } 3087 3088 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3089 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3090 ierr = PetscFree(idxb);CHKERRQ(ierr); 3091 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3092 PetscFunctionReturn(0); 3093 } 3094 3095 #undef __FUNCT__ 3096 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3097 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3098 { 3099 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3100 PetscInt n = A->rmap->n; 3101 PetscInt cstart = A->cmap->rstart; 3102 PetscInt *cmap = mat->garray; 3103 PetscInt *diagIdx, *offdiagIdx; 3104 Vec diagV, offdiagV; 3105 PetscScalar *a, *diagA, *offdiagA; 3106 PetscInt r; 3107 PetscErrorCode ierr; 3108 3109 PetscFunctionBegin; 3110 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3111 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3112 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3113 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3114 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3115 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3116 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3117 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3118 for (r = 0; r < n; ++r) { 3119 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3120 a[r] = diagA[r]; 3121 idx[r] = cstart + diagIdx[r]; 3122 } else { 3123 a[r] = offdiagA[r]; 3124 idx[r] = cmap[offdiagIdx[r]]; 3125 } 3126 } 3127 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3128 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3129 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3130 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3131 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3132 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3133 PetscFunctionReturn(0); 3134 } 3135 3136 #undef __FUNCT__ 3137 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3138 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3139 { 3140 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3141 PetscInt n = A->rmap->n; 3142 PetscInt cstart = A->cmap->rstart; 3143 PetscInt *cmap = mat->garray; 3144 PetscInt *diagIdx, *offdiagIdx; 3145 Vec diagV, offdiagV; 3146 PetscScalar *a, *diagA, *offdiagA; 3147 PetscInt r; 3148 PetscErrorCode ierr; 3149 3150 PetscFunctionBegin; 3151 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3152 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3153 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3154 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3155 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3156 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3157 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3158 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3159 for (r = 0; r < n; ++r) { 3160 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3161 a[r] = diagA[r]; 3162 idx[r] = cstart + diagIdx[r]; 3163 } else { 3164 a[r] = offdiagA[r]; 3165 idx[r] = cmap[offdiagIdx[r]]; 3166 } 3167 } 3168 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3169 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3170 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3171 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3172 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3173 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3174 PetscFunctionReturn(0); 3175 } 3176 3177 #undef __FUNCT__ 3178 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3179 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3180 { 3181 PetscErrorCode ierr; 3182 Mat *dummy; 3183 3184 PetscFunctionBegin; 3185 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3186 *newmat = *dummy; 3187 ierr = PetscFree(dummy);CHKERRQ(ierr); 3188 PetscFunctionReturn(0); 3189 } 3190 3191 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3192 3193 #undef __FUNCT__ 3194 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3195 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3196 { 3197 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3198 PetscErrorCode ierr; 3199 3200 PetscFunctionBegin; 3201 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3202 PetscFunctionReturn(0); 3203 } 3204 3205 #undef __FUNCT__ 3206 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3207 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3208 { 3209 PetscErrorCode ierr; 3210 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3211 3212 PetscFunctionBegin; 3213 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3214 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3215 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3216 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3217 PetscFunctionReturn(0); 3218 } 3219 3220 /* -------------------------------------------------------------------*/ 3221 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3222 MatGetRow_MPIAIJ, 3223 MatRestoreRow_MPIAIJ, 3224 MatMult_MPIAIJ, 3225 /* 4*/ MatMultAdd_MPIAIJ, 3226 MatMultTranspose_MPIAIJ, 3227 MatMultTransposeAdd_MPIAIJ, 3228 #if defined(PETSC_HAVE_PBGL) 3229 MatSolve_MPIAIJ, 3230 #else 3231 0, 3232 #endif 3233 0, 3234 0, 3235 /*10*/ 0, 3236 0, 3237 0, 3238 MatSOR_MPIAIJ, 3239 MatTranspose_MPIAIJ, 3240 /*15*/ MatGetInfo_MPIAIJ, 3241 MatEqual_MPIAIJ, 3242 MatGetDiagonal_MPIAIJ, 3243 MatDiagonalScale_MPIAIJ, 3244 MatNorm_MPIAIJ, 3245 /*20*/ MatAssemblyBegin_MPIAIJ, 3246 MatAssemblyEnd_MPIAIJ, 3247 MatSetOption_MPIAIJ, 3248 MatZeroEntries_MPIAIJ, 3249 /*24*/ MatZeroRows_MPIAIJ, 3250 0, 3251 #if defined(PETSC_HAVE_PBGL) 3252 0, 3253 #else 3254 0, 3255 #endif 3256 0, 3257 0, 3258 /*29*/ MatSetUp_MPIAIJ, 3259 #if defined(PETSC_HAVE_PBGL) 3260 0, 3261 #else 3262 0, 3263 #endif 3264 0, 3265 0, 3266 0, 3267 /*34*/ MatDuplicate_MPIAIJ, 3268 0, 3269 0, 3270 0, 3271 0, 3272 /*39*/ MatAXPY_MPIAIJ, 3273 MatGetSubMatrices_MPIAIJ, 3274 MatIncreaseOverlap_MPIAIJ, 3275 MatGetValues_MPIAIJ, 3276 MatCopy_MPIAIJ, 3277 /*44*/ MatGetRowMax_MPIAIJ, 3278 MatScale_MPIAIJ, 3279 0, 3280 0, 3281 MatZeroRowsColumns_MPIAIJ, 3282 /*49*/ MatSetRandom_MPIAIJ, 3283 0, 3284 0, 3285 0, 3286 0, 3287 /*54*/ MatFDColoringCreate_MPIAIJ, 3288 0, 3289 MatSetUnfactored_MPIAIJ, 3290 MatPermute_MPIAIJ, 3291 0, 3292 /*59*/ MatGetSubMatrix_MPIAIJ, 3293 MatDestroy_MPIAIJ, 3294 MatView_MPIAIJ, 3295 0, 3296 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3297 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3298 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3299 0, 3300 0, 3301 0, 3302 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3303 MatGetRowMinAbs_MPIAIJ, 3304 0, 3305 MatSetColoring_MPIAIJ, 3306 0, 3307 MatSetValuesAdifor_MPIAIJ, 3308 /*75*/ MatFDColoringApply_AIJ, 3309 0, 3310 0, 3311 0, 3312 MatFindZeroDiagonals_MPIAIJ, 3313 /*80*/ 0, 3314 0, 3315 0, 3316 /*83*/ MatLoad_MPIAIJ, 3317 0, 3318 0, 3319 0, 3320 0, 3321 0, 3322 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3323 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3324 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3325 MatPtAP_MPIAIJ_MPIAIJ, 3326 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3327 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3328 0, 3329 0, 3330 0, 3331 0, 3332 /*99*/ 0, 3333 0, 3334 0, 3335 MatConjugate_MPIAIJ, 3336 0, 3337 /*104*/MatSetValuesRow_MPIAIJ, 3338 MatRealPart_MPIAIJ, 3339 MatImaginaryPart_MPIAIJ, 3340 0, 3341 0, 3342 /*109*/0, 3343 MatGetRedundantMatrix_MPIAIJ, 3344 MatGetRowMin_MPIAIJ, 3345 0, 3346 0, 3347 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3348 0, 3349 0, 3350 0, 3351 0, 3352 /*119*/0, 3353 0, 3354 0, 3355 0, 3356 MatGetMultiProcBlock_MPIAIJ, 3357 /*124*/MatFindNonzeroRows_MPIAIJ, 3358 MatGetColumnNorms_MPIAIJ, 3359 MatInvertBlockDiagonal_MPIAIJ, 3360 0, 3361 MatGetSubMatricesParallel_MPIAIJ, 3362 /*129*/0, 3363 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3364 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3365 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3366 0, 3367 /*134*/0, 3368 0, 3369 0, 3370 0, 3371 0, 3372 /*139*/0, 3373 0, 3374 0 3375 }; 3376 3377 /* ----------------------------------------------------------------------------------------*/ 3378 3379 #undef __FUNCT__ 3380 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3381 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3382 { 3383 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3384 PetscErrorCode ierr; 3385 3386 PetscFunctionBegin; 3387 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3388 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3389 PetscFunctionReturn(0); 3390 } 3391 3392 #undef __FUNCT__ 3393 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3394 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3395 { 3396 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3397 PetscErrorCode ierr; 3398 3399 PetscFunctionBegin; 3400 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3401 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3402 PetscFunctionReturn(0); 3403 } 3404 3405 #undef __FUNCT__ 3406 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3407 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3408 { 3409 Mat_MPIAIJ *b; 3410 PetscErrorCode ierr; 3411 3412 PetscFunctionBegin; 3413 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3414 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3415 b = (Mat_MPIAIJ*)B->data; 3416 3417 if (!B->preallocated) { 3418 /* Explicitly create 2 MATSEQAIJ matrices. */ 3419 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3420 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3421 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3422 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3423 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3424 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3425 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3426 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3427 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3428 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3429 } 3430 3431 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3432 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3433 B->preallocated = PETSC_TRUE; 3434 PetscFunctionReturn(0); 3435 } 3436 3437 #undef __FUNCT__ 3438 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3439 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3440 { 3441 Mat mat; 3442 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3443 PetscErrorCode ierr; 3444 3445 PetscFunctionBegin; 3446 *newmat = 0; 3447 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3448 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3449 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3450 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3451 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3452 a = (Mat_MPIAIJ*)mat->data; 3453 3454 mat->factortype = matin->factortype; 3455 mat->rmap->bs = matin->rmap->bs; 3456 mat->cmap->bs = matin->cmap->bs; 3457 mat->assembled = PETSC_TRUE; 3458 mat->insertmode = NOT_SET_VALUES; 3459 mat->preallocated = PETSC_TRUE; 3460 3461 a->size = oldmat->size; 3462 a->rank = oldmat->rank; 3463 a->donotstash = oldmat->donotstash; 3464 a->roworiented = oldmat->roworiented; 3465 a->rowindices = 0; 3466 a->rowvalues = 0; 3467 a->getrowactive = PETSC_FALSE; 3468 3469 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3470 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3471 3472 if (oldmat->colmap) { 3473 #if defined(PETSC_USE_CTABLE) 3474 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3475 #else 3476 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3477 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3478 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3479 #endif 3480 } else a->colmap = 0; 3481 if (oldmat->garray) { 3482 PetscInt len; 3483 len = oldmat->B->cmap->n; 3484 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3485 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3486 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3487 } else a->garray = 0; 3488 3489 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3490 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3491 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3492 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3493 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3494 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3495 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3496 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3497 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3498 *newmat = mat; 3499 PetscFunctionReturn(0); 3500 } 3501 3502 3503 3504 #undef __FUNCT__ 3505 #define __FUNCT__ "MatLoad_MPIAIJ" 3506 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3507 { 3508 PetscScalar *vals,*svals; 3509 MPI_Comm comm; 3510 PetscErrorCode ierr; 3511 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3512 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3513 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3514 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3515 PetscInt cend,cstart,n,*rowners,sizesset=1; 3516 int fd; 3517 PetscInt bs = 1; 3518 3519 PetscFunctionBegin; 3520 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3521 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3522 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3523 if (!rank) { 3524 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3525 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3526 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3527 } 3528 3529 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3530 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3531 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3532 3533 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3534 3535 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3536 M = header[1]; N = header[2]; 3537 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3538 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3539 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3540 3541 /* If global sizes are set, check if they are consistent with that given in the file */ 3542 if (sizesset) { 3543 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3544 } 3545 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3546 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3547 3548 /* determine ownership of all (block) rows */ 3549 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3550 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3551 else m = newMat->rmap->n; /* Set by user */ 3552 3553 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3554 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3555 3556 /* First process needs enough room for process with most rows */ 3557 if (!rank) { 3558 mmax = rowners[1]; 3559 for (i=2; i<=size; i++) { 3560 mmax = PetscMax(mmax, rowners[i]); 3561 } 3562 } else mmax = -1; /* unused, but compilers complain */ 3563 3564 rowners[0] = 0; 3565 for (i=2; i<=size; i++) { 3566 rowners[i] += rowners[i-1]; 3567 } 3568 rstart = rowners[rank]; 3569 rend = rowners[rank+1]; 3570 3571 /* distribute row lengths to all processors */ 3572 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3573 if (!rank) { 3574 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3575 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3576 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3577 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3578 for (j=0; j<m; j++) { 3579 procsnz[0] += ourlens[j]; 3580 } 3581 for (i=1; i<size; i++) { 3582 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3583 /* calculate the number of nonzeros on each processor */ 3584 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3585 procsnz[i] += rowlengths[j]; 3586 } 3587 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3588 } 3589 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3590 } else { 3591 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3592 } 3593 3594 if (!rank) { 3595 /* determine max buffer needed and allocate it */ 3596 maxnz = 0; 3597 for (i=0; i<size; i++) { 3598 maxnz = PetscMax(maxnz,procsnz[i]); 3599 } 3600 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3601 3602 /* read in my part of the matrix column indices */ 3603 nz = procsnz[0]; 3604 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3605 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3606 3607 /* read in every one elses and ship off */ 3608 for (i=1; i<size; i++) { 3609 nz = procsnz[i]; 3610 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3611 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3612 } 3613 ierr = PetscFree(cols);CHKERRQ(ierr); 3614 } else { 3615 /* determine buffer space needed for message */ 3616 nz = 0; 3617 for (i=0; i<m; i++) { 3618 nz += ourlens[i]; 3619 } 3620 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3621 3622 /* receive message of column indices*/ 3623 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3624 } 3625 3626 /* determine column ownership if matrix is not square */ 3627 if (N != M) { 3628 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3629 else n = newMat->cmap->n; 3630 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3631 cstart = cend - n; 3632 } else { 3633 cstart = rstart; 3634 cend = rend; 3635 n = cend - cstart; 3636 } 3637 3638 /* loop over local rows, determining number of off diagonal entries */ 3639 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3640 jj = 0; 3641 for (i=0; i<m; i++) { 3642 for (j=0; j<ourlens[i]; j++) { 3643 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3644 jj++; 3645 } 3646 } 3647 3648 for (i=0; i<m; i++) { 3649 ourlens[i] -= offlens[i]; 3650 } 3651 if (!sizesset) { 3652 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3653 } 3654 3655 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3656 3657 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3658 3659 for (i=0; i<m; i++) { 3660 ourlens[i] += offlens[i]; 3661 } 3662 3663 if (!rank) { 3664 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3665 3666 /* read in my part of the matrix numerical values */ 3667 nz = procsnz[0]; 3668 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3669 3670 /* insert into matrix */ 3671 jj = rstart; 3672 smycols = mycols; 3673 svals = vals; 3674 for (i=0; i<m; i++) { 3675 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3676 smycols += ourlens[i]; 3677 svals += ourlens[i]; 3678 jj++; 3679 } 3680 3681 /* read in other processors and ship out */ 3682 for (i=1; i<size; i++) { 3683 nz = procsnz[i]; 3684 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3685 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3686 } 3687 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3688 } else { 3689 /* receive numeric values */ 3690 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3691 3692 /* receive message of values*/ 3693 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3694 3695 /* insert into matrix */ 3696 jj = rstart; 3697 smycols = mycols; 3698 svals = vals; 3699 for (i=0; i<m; i++) { 3700 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3701 smycols += ourlens[i]; 3702 svals += ourlens[i]; 3703 jj++; 3704 } 3705 } 3706 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3707 ierr = PetscFree(vals);CHKERRQ(ierr); 3708 ierr = PetscFree(mycols);CHKERRQ(ierr); 3709 ierr = PetscFree(rowners);CHKERRQ(ierr); 3710 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3711 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3712 PetscFunctionReturn(0); 3713 } 3714 3715 #undef __FUNCT__ 3716 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3717 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3718 { 3719 PetscErrorCode ierr; 3720 IS iscol_local; 3721 PetscInt csize; 3722 3723 PetscFunctionBegin; 3724 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3725 if (call == MAT_REUSE_MATRIX) { 3726 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3727 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3728 } else { 3729 PetscInt cbs; 3730 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3731 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3732 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3733 } 3734 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3735 if (call == MAT_INITIAL_MATRIX) { 3736 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3737 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3738 } 3739 PetscFunctionReturn(0); 3740 } 3741 3742 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3743 #undef __FUNCT__ 3744 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3745 /* 3746 Not great since it makes two copies of the submatrix, first an SeqAIJ 3747 in local and then by concatenating the local matrices the end result. 3748 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3749 3750 Note: This requires a sequential iscol with all indices. 3751 */ 3752 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3753 { 3754 PetscErrorCode ierr; 3755 PetscMPIInt rank,size; 3756 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3757 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3758 PetscBool allcolumns, colflag; 3759 Mat M,Mreuse; 3760 MatScalar *vwork,*aa; 3761 MPI_Comm comm; 3762 Mat_SeqAIJ *aij; 3763 3764 PetscFunctionBegin; 3765 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3766 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3767 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3768 3769 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3770 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3771 if (colflag && ncol == mat->cmap->N) { 3772 allcolumns = PETSC_TRUE; 3773 } else { 3774 allcolumns = PETSC_FALSE; 3775 } 3776 if (call == MAT_REUSE_MATRIX) { 3777 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3778 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3779 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3780 } else { 3781 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3782 } 3783 3784 /* 3785 m - number of local rows 3786 n - number of columns (same on all processors) 3787 rstart - first row in new global matrix generated 3788 */ 3789 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3790 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3791 if (call == MAT_INITIAL_MATRIX) { 3792 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3793 ii = aij->i; 3794 jj = aij->j; 3795 3796 /* 3797 Determine the number of non-zeros in the diagonal and off-diagonal 3798 portions of the matrix in order to do correct preallocation 3799 */ 3800 3801 /* first get start and end of "diagonal" columns */ 3802 if (csize == PETSC_DECIDE) { 3803 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3804 if (mglobal == n) { /* square matrix */ 3805 nlocal = m; 3806 } else { 3807 nlocal = n/size + ((n % size) > rank); 3808 } 3809 } else { 3810 nlocal = csize; 3811 } 3812 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3813 rstart = rend - nlocal; 3814 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3815 3816 /* next, compute all the lengths */ 3817 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3818 olens = dlens + m; 3819 for (i=0; i<m; i++) { 3820 jend = ii[i+1] - ii[i]; 3821 olen = 0; 3822 dlen = 0; 3823 for (j=0; j<jend; j++) { 3824 if (*jj < rstart || *jj >= rend) olen++; 3825 else dlen++; 3826 jj++; 3827 } 3828 olens[i] = olen; 3829 dlens[i] = dlen; 3830 } 3831 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3832 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3833 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3834 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3835 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3836 ierr = PetscFree(dlens);CHKERRQ(ierr); 3837 } else { 3838 PetscInt ml,nl; 3839 3840 M = *newmat; 3841 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3842 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3843 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3844 /* 3845 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3846 rather than the slower MatSetValues(). 3847 */ 3848 M->was_assembled = PETSC_TRUE; 3849 M->assembled = PETSC_FALSE; 3850 } 3851 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3852 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3853 ii = aij->i; 3854 jj = aij->j; 3855 aa = aij->a; 3856 for (i=0; i<m; i++) { 3857 row = rstart + i; 3858 nz = ii[i+1] - ii[i]; 3859 cwork = jj; jj += nz; 3860 vwork = aa; aa += nz; 3861 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3862 } 3863 3864 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3866 *newmat = M; 3867 3868 /* save submatrix used in processor for next request */ 3869 if (call == MAT_INITIAL_MATRIX) { 3870 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3871 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3872 } 3873 PetscFunctionReturn(0); 3874 } 3875 3876 #undef __FUNCT__ 3877 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3878 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3879 { 3880 PetscInt m,cstart, cend,j,nnz,i,d; 3881 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3882 const PetscInt *JJ; 3883 PetscScalar *values; 3884 PetscErrorCode ierr; 3885 3886 PetscFunctionBegin; 3887 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3888 3889 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3890 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3891 m = B->rmap->n; 3892 cstart = B->cmap->rstart; 3893 cend = B->cmap->rend; 3894 rstart = B->rmap->rstart; 3895 3896 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3897 3898 #if defined(PETSC_USE_DEBUGGING) 3899 for (i=0; i<m; i++) { 3900 nnz = Ii[i+1]- Ii[i]; 3901 JJ = J + Ii[i]; 3902 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3903 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3904 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3905 } 3906 #endif 3907 3908 for (i=0; i<m; i++) { 3909 nnz = Ii[i+1]- Ii[i]; 3910 JJ = J + Ii[i]; 3911 nnz_max = PetscMax(nnz_max,nnz); 3912 d = 0; 3913 for (j=0; j<nnz; j++) { 3914 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3915 } 3916 d_nnz[i] = d; 3917 o_nnz[i] = nnz - d; 3918 } 3919 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3920 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3921 3922 if (v) values = (PetscScalar*)v; 3923 else { 3924 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3925 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3926 } 3927 3928 for (i=0; i<m; i++) { 3929 ii = i + rstart; 3930 nnz = Ii[i+1]- Ii[i]; 3931 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3932 } 3933 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3934 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3935 3936 if (!v) { 3937 ierr = PetscFree(values);CHKERRQ(ierr); 3938 } 3939 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3940 PetscFunctionReturn(0); 3941 } 3942 3943 #undef __FUNCT__ 3944 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3945 /*@ 3946 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3947 (the default parallel PETSc format). 3948 3949 Collective on MPI_Comm 3950 3951 Input Parameters: 3952 + B - the matrix 3953 . i - the indices into j for the start of each local row (starts with zero) 3954 . j - the column indices for each local row (starts with zero) 3955 - v - optional values in the matrix 3956 3957 Level: developer 3958 3959 Notes: 3960 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3961 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3962 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3963 3964 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3965 3966 The format which is used for the sparse matrix input, is equivalent to a 3967 row-major ordering.. i.e for the following matrix, the input data expected is 3968 as shown: 3969 3970 1 0 0 3971 2 0 3 P0 3972 ------- 3973 4 5 6 P1 3974 3975 Process0 [P0]: rows_owned=[0,1] 3976 i = {0,1,3} [size = nrow+1 = 2+1] 3977 j = {0,0,2} [size = nz = 6] 3978 v = {1,2,3} [size = nz = 6] 3979 3980 Process1 [P1]: rows_owned=[2] 3981 i = {0,3} [size = nrow+1 = 1+1] 3982 j = {0,1,2} [size = nz = 6] 3983 v = {4,5,6} [size = nz = 6] 3984 3985 .keywords: matrix, aij, compressed row, sparse, parallel 3986 3987 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3988 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3989 @*/ 3990 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3991 { 3992 PetscErrorCode ierr; 3993 3994 PetscFunctionBegin; 3995 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3996 PetscFunctionReturn(0); 3997 } 3998 3999 #undef __FUNCT__ 4000 #define __FUNCT__ "MatMPIAIJSetPreallocation" 4001 /*@C 4002 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4003 (the default parallel PETSc format). For good matrix assembly performance 4004 the user should preallocate the matrix storage by setting the parameters 4005 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4006 performance can be increased by more than a factor of 50. 4007 4008 Collective on MPI_Comm 4009 4010 Input Parameters: 4011 + A - the matrix 4012 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4013 (same value is used for all local rows) 4014 . d_nnz - array containing the number of nonzeros in the various rows of the 4015 DIAGONAL portion of the local submatrix (possibly different for each row) 4016 or NULL, if d_nz is used to specify the nonzero structure. 4017 The size of this array is equal to the number of local rows, i.e 'm'. 4018 For matrices that will be factored, you must leave room for (and set) 4019 the diagonal entry even if it is zero. 4020 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4021 submatrix (same value is used for all local rows). 4022 - o_nnz - array containing the number of nonzeros in the various rows of the 4023 OFF-DIAGONAL portion of the local submatrix (possibly different for 4024 each row) or NULL, if o_nz is used to specify the nonzero 4025 structure. The size of this array is equal to the number 4026 of local rows, i.e 'm'. 4027 4028 If the *_nnz parameter is given then the *_nz parameter is ignored 4029 4030 The AIJ format (also called the Yale sparse matrix format or 4031 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4032 storage. The stored row and column indices begin with zero. 4033 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 4034 4035 The parallel matrix is partitioned such that the first m0 rows belong to 4036 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4037 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4038 4039 The DIAGONAL portion of the local submatrix of a processor can be defined 4040 as the submatrix which is obtained by extraction the part corresponding to 4041 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4042 first row that belongs to the processor, r2 is the last row belonging to 4043 the this processor, and c1-c2 is range of indices of the local part of a 4044 vector suitable for applying the matrix to. This is an mxn matrix. In the 4045 common case of a square matrix, the row and column ranges are the same and 4046 the DIAGONAL part is also square. The remaining portion of the local 4047 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4048 4049 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4050 4051 You can call MatGetInfo() to get information on how effective the preallocation was; 4052 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4053 You can also run with the option -info and look for messages with the string 4054 malloc in them to see if additional memory allocation was needed. 4055 4056 Example usage: 4057 4058 Consider the following 8x8 matrix with 34 non-zero values, that is 4059 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4060 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4061 as follows: 4062 4063 .vb 4064 1 2 0 | 0 3 0 | 0 4 4065 Proc0 0 5 6 | 7 0 0 | 8 0 4066 9 0 10 | 11 0 0 | 12 0 4067 ------------------------------------- 4068 13 0 14 | 15 16 17 | 0 0 4069 Proc1 0 18 0 | 19 20 21 | 0 0 4070 0 0 0 | 22 23 0 | 24 0 4071 ------------------------------------- 4072 Proc2 25 26 27 | 0 0 28 | 29 0 4073 30 0 0 | 31 32 33 | 0 34 4074 .ve 4075 4076 This can be represented as a collection of submatrices as: 4077 4078 .vb 4079 A B C 4080 D E F 4081 G H I 4082 .ve 4083 4084 Where the submatrices A,B,C are owned by proc0, D,E,F are 4085 owned by proc1, G,H,I are owned by proc2. 4086 4087 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4088 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4089 The 'M','N' parameters are 8,8, and have the same values on all procs. 4090 4091 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4092 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4093 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4094 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4095 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4096 matrix, ans [DF] as another SeqAIJ matrix. 4097 4098 When d_nz, o_nz parameters are specified, d_nz storage elements are 4099 allocated for every row of the local diagonal submatrix, and o_nz 4100 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4101 One way to choose d_nz and o_nz is to use the max nonzerors per local 4102 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4103 In this case, the values of d_nz,o_nz are: 4104 .vb 4105 proc0 : dnz = 2, o_nz = 2 4106 proc1 : dnz = 3, o_nz = 2 4107 proc2 : dnz = 1, o_nz = 4 4108 .ve 4109 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4110 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4111 for proc3. i.e we are using 12+15+10=37 storage locations to store 4112 34 values. 4113 4114 When d_nnz, o_nnz parameters are specified, the storage is specified 4115 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4116 In the above case the values for d_nnz,o_nnz are: 4117 .vb 4118 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4119 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4120 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4121 .ve 4122 Here the space allocated is sum of all the above values i.e 34, and 4123 hence pre-allocation is perfect. 4124 4125 Level: intermediate 4126 4127 .keywords: matrix, aij, compressed row, sparse, parallel 4128 4129 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4130 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4131 @*/ 4132 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4133 { 4134 PetscErrorCode ierr; 4135 4136 PetscFunctionBegin; 4137 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4138 PetscValidType(B,1); 4139 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4140 PetscFunctionReturn(0); 4141 } 4142 4143 #undef __FUNCT__ 4144 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4145 /*@ 4146 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4147 CSR format the local rows. 4148 4149 Collective on MPI_Comm 4150 4151 Input Parameters: 4152 + comm - MPI communicator 4153 . m - number of local rows (Cannot be PETSC_DECIDE) 4154 . n - This value should be the same as the local size used in creating the 4155 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4156 calculated if N is given) For square matrices n is almost always m. 4157 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4158 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4159 . i - row indices 4160 . j - column indices 4161 - a - matrix values 4162 4163 Output Parameter: 4164 . mat - the matrix 4165 4166 Level: intermediate 4167 4168 Notes: 4169 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4170 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4171 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4172 4173 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4174 4175 The format which is used for the sparse matrix input, is equivalent to a 4176 row-major ordering.. i.e for the following matrix, the input data expected is 4177 as shown: 4178 4179 1 0 0 4180 2 0 3 P0 4181 ------- 4182 4 5 6 P1 4183 4184 Process0 [P0]: rows_owned=[0,1] 4185 i = {0,1,3} [size = nrow+1 = 2+1] 4186 j = {0,0,2} [size = nz = 6] 4187 v = {1,2,3} [size = nz = 6] 4188 4189 Process1 [P1]: rows_owned=[2] 4190 i = {0,3} [size = nrow+1 = 1+1] 4191 j = {0,1,2} [size = nz = 6] 4192 v = {4,5,6} [size = nz = 6] 4193 4194 .keywords: matrix, aij, compressed row, sparse, parallel 4195 4196 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4197 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4198 @*/ 4199 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4200 { 4201 PetscErrorCode ierr; 4202 4203 PetscFunctionBegin; 4204 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4205 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4206 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4207 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4208 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4209 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4210 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4211 PetscFunctionReturn(0); 4212 } 4213 4214 #undef __FUNCT__ 4215 #define __FUNCT__ "MatCreateAIJ" 4216 /*@C 4217 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4218 (the default parallel PETSc format). For good matrix assembly performance 4219 the user should preallocate the matrix storage by setting the parameters 4220 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4221 performance can be increased by more than a factor of 50. 4222 4223 Collective on MPI_Comm 4224 4225 Input Parameters: 4226 + comm - MPI communicator 4227 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4228 This value should be the same as the local size used in creating the 4229 y vector for the matrix-vector product y = Ax. 4230 . n - This value should be the same as the local size used in creating the 4231 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4232 calculated if N is given) For square matrices n is almost always m. 4233 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4234 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4235 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4236 (same value is used for all local rows) 4237 . d_nnz - array containing the number of nonzeros in the various rows of the 4238 DIAGONAL portion of the local submatrix (possibly different for each row) 4239 or NULL, if d_nz is used to specify the nonzero structure. 4240 The size of this array is equal to the number of local rows, i.e 'm'. 4241 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4242 submatrix (same value is used for all local rows). 4243 - o_nnz - array containing the number of nonzeros in the various rows of the 4244 OFF-DIAGONAL portion of the local submatrix (possibly different for 4245 each row) or NULL, if o_nz is used to specify the nonzero 4246 structure. The size of this array is equal to the number 4247 of local rows, i.e 'm'. 4248 4249 Output Parameter: 4250 . A - the matrix 4251 4252 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4253 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4254 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4255 4256 Notes: 4257 If the *_nnz parameter is given then the *_nz parameter is ignored 4258 4259 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4260 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4261 storage requirements for this matrix. 4262 4263 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4264 processor than it must be used on all processors that share the object for 4265 that argument. 4266 4267 The user MUST specify either the local or global matrix dimensions 4268 (possibly both). 4269 4270 The parallel matrix is partitioned across processors such that the 4271 first m0 rows belong to process 0, the next m1 rows belong to 4272 process 1, the next m2 rows belong to process 2 etc.. where 4273 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4274 values corresponding to [m x N] submatrix. 4275 4276 The columns are logically partitioned with the n0 columns belonging 4277 to 0th partition, the next n1 columns belonging to the next 4278 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4279 4280 The DIAGONAL portion of the local submatrix on any given processor 4281 is the submatrix corresponding to the rows and columns m,n 4282 corresponding to the given processor. i.e diagonal matrix on 4283 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4284 etc. The remaining portion of the local submatrix [m x (N-n)] 4285 constitute the OFF-DIAGONAL portion. The example below better 4286 illustrates this concept. 4287 4288 For a square global matrix we define each processor's diagonal portion 4289 to be its local rows and the corresponding columns (a square submatrix); 4290 each processor's off-diagonal portion encompasses the remainder of the 4291 local matrix (a rectangular submatrix). 4292 4293 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4294 4295 When calling this routine with a single process communicator, a matrix of 4296 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4297 type of communicator, use the construction mechanism: 4298 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4299 4300 By default, this format uses inodes (identical nodes) when possible. 4301 We search for consecutive rows with the same nonzero structure, thereby 4302 reusing matrix information to achieve increased efficiency. 4303 4304 Options Database Keys: 4305 + -mat_no_inode - Do not use inodes 4306 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4307 - -mat_aij_oneindex - Internally use indexing starting at 1 4308 rather than 0. Note that when calling MatSetValues(), 4309 the user still MUST index entries starting at 0! 4310 4311 4312 Example usage: 4313 4314 Consider the following 8x8 matrix with 34 non-zero values, that is 4315 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4316 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4317 as follows: 4318 4319 .vb 4320 1 2 0 | 0 3 0 | 0 4 4321 Proc0 0 5 6 | 7 0 0 | 8 0 4322 9 0 10 | 11 0 0 | 12 0 4323 ------------------------------------- 4324 13 0 14 | 15 16 17 | 0 0 4325 Proc1 0 18 0 | 19 20 21 | 0 0 4326 0 0 0 | 22 23 0 | 24 0 4327 ------------------------------------- 4328 Proc2 25 26 27 | 0 0 28 | 29 0 4329 30 0 0 | 31 32 33 | 0 34 4330 .ve 4331 4332 This can be represented as a collection of submatrices as: 4333 4334 .vb 4335 A B C 4336 D E F 4337 G H I 4338 .ve 4339 4340 Where the submatrices A,B,C are owned by proc0, D,E,F are 4341 owned by proc1, G,H,I are owned by proc2. 4342 4343 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4344 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4345 The 'M','N' parameters are 8,8, and have the same values on all procs. 4346 4347 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4348 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4349 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4350 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4351 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4352 matrix, ans [DF] as another SeqAIJ matrix. 4353 4354 When d_nz, o_nz parameters are specified, d_nz storage elements are 4355 allocated for every row of the local diagonal submatrix, and o_nz 4356 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4357 One way to choose d_nz and o_nz is to use the max nonzerors per local 4358 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4359 In this case, the values of d_nz,o_nz are: 4360 .vb 4361 proc0 : dnz = 2, o_nz = 2 4362 proc1 : dnz = 3, o_nz = 2 4363 proc2 : dnz = 1, o_nz = 4 4364 .ve 4365 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4366 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4367 for proc3. i.e we are using 12+15+10=37 storage locations to store 4368 34 values. 4369 4370 When d_nnz, o_nnz parameters are specified, the storage is specified 4371 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4372 In the above case the values for d_nnz,o_nnz are: 4373 .vb 4374 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4375 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4376 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4377 .ve 4378 Here the space allocated is sum of all the above values i.e 34, and 4379 hence pre-allocation is perfect. 4380 4381 Level: intermediate 4382 4383 .keywords: matrix, aij, compressed row, sparse, parallel 4384 4385 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4386 MPIAIJ, MatCreateMPIAIJWithArrays() 4387 @*/ 4388 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4389 { 4390 PetscErrorCode ierr; 4391 PetscMPIInt size; 4392 4393 PetscFunctionBegin; 4394 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4395 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4396 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4397 if (size > 1) { 4398 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4399 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4400 } else { 4401 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4402 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4403 } 4404 PetscFunctionReturn(0); 4405 } 4406 4407 #undef __FUNCT__ 4408 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4409 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4410 { 4411 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4412 4413 PetscFunctionBegin; 4414 *Ad = a->A; 4415 *Ao = a->B; 4416 *colmap = a->garray; 4417 PetscFunctionReturn(0); 4418 } 4419 4420 #undef __FUNCT__ 4421 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4422 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4423 { 4424 PetscErrorCode ierr; 4425 PetscInt i; 4426 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4427 4428 PetscFunctionBegin; 4429 if (coloring->ctype == IS_COLORING_GLOBAL) { 4430 ISColoringValue *allcolors,*colors; 4431 ISColoring ocoloring; 4432 4433 /* set coloring for diagonal portion */ 4434 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4435 4436 /* set coloring for off-diagonal portion */ 4437 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4438 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4439 for (i=0; i<a->B->cmap->n; i++) { 4440 colors[i] = allcolors[a->garray[i]]; 4441 } 4442 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4443 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4444 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4445 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4446 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4447 ISColoringValue *colors; 4448 PetscInt *larray; 4449 ISColoring ocoloring; 4450 4451 /* set coloring for diagonal portion */ 4452 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4453 for (i=0; i<a->A->cmap->n; i++) { 4454 larray[i] = i + A->cmap->rstart; 4455 } 4456 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4457 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4458 for (i=0; i<a->A->cmap->n; i++) { 4459 colors[i] = coloring->colors[larray[i]]; 4460 } 4461 ierr = PetscFree(larray);CHKERRQ(ierr); 4462 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4463 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4464 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4465 4466 /* set coloring for off-diagonal portion */ 4467 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4468 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4469 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4470 for (i=0; i<a->B->cmap->n; i++) { 4471 colors[i] = coloring->colors[larray[i]]; 4472 } 4473 ierr = PetscFree(larray);CHKERRQ(ierr); 4474 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4475 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4476 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4477 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4478 PetscFunctionReturn(0); 4479 } 4480 4481 #undef __FUNCT__ 4482 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4483 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4484 { 4485 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4486 PetscErrorCode ierr; 4487 4488 PetscFunctionBegin; 4489 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4490 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4491 PetscFunctionReturn(0); 4492 } 4493 4494 #undef __FUNCT__ 4495 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4496 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4497 { 4498 PetscErrorCode ierr; 4499 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4500 PetscInt *indx; 4501 4502 PetscFunctionBegin; 4503 /* This routine will ONLY return MPIAIJ type matrix */ 4504 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4505 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4506 if (n == PETSC_DECIDE) { 4507 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4508 } 4509 /* Check sum(n) = N */ 4510 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4511 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4512 4513 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4514 rstart -= m; 4515 4516 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4517 for (i=0; i<m; i++) { 4518 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4519 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4520 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4521 } 4522 4523 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4524 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4525 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4526 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4527 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4528 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4529 PetscFunctionReturn(0); 4530 } 4531 4532 #undef __FUNCT__ 4533 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4534 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4535 { 4536 PetscErrorCode ierr; 4537 PetscInt m,N,i,rstart,nnz,Ii; 4538 PetscInt *indx; 4539 PetscScalar *values; 4540 4541 PetscFunctionBegin; 4542 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4543 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4544 for (i=0; i<m; i++) { 4545 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4546 Ii = i + rstart; 4547 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4548 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4549 } 4550 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4551 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4552 PetscFunctionReturn(0); 4553 } 4554 4555 #undef __FUNCT__ 4556 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4557 /*@ 4558 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4559 matrices from each processor 4560 4561 Collective on MPI_Comm 4562 4563 Input Parameters: 4564 + comm - the communicators the parallel matrix will live on 4565 . inmat - the input sequential matrices 4566 . n - number of local columns (or PETSC_DECIDE) 4567 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4568 4569 Output Parameter: 4570 . outmat - the parallel matrix generated 4571 4572 Level: advanced 4573 4574 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4575 4576 @*/ 4577 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4578 { 4579 PetscErrorCode ierr; 4580 PetscMPIInt size; 4581 4582 PetscFunctionBegin; 4583 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4584 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4585 if (size == 1) { 4586 if (scall == MAT_INITIAL_MATRIX) { 4587 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4588 } else { 4589 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4590 } 4591 } else { 4592 if (scall == MAT_INITIAL_MATRIX) { 4593 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4594 } 4595 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4596 } 4597 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4598 PetscFunctionReturn(0); 4599 } 4600 4601 #undef __FUNCT__ 4602 #define __FUNCT__ "MatFileSplit" 4603 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4604 { 4605 PetscErrorCode ierr; 4606 PetscMPIInt rank; 4607 PetscInt m,N,i,rstart,nnz; 4608 size_t len; 4609 const PetscInt *indx; 4610 PetscViewer out; 4611 char *name; 4612 Mat B; 4613 const PetscScalar *values; 4614 4615 PetscFunctionBegin; 4616 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4617 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4618 /* Should this be the type of the diagonal block of A? */ 4619 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4620 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4621 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4622 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4623 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4624 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4625 for (i=0; i<m; i++) { 4626 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4627 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4628 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4629 } 4630 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4631 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4632 4633 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4634 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4635 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4636 sprintf(name,"%s.%d",outfile,rank); 4637 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4638 ierr = PetscFree(name);CHKERRQ(ierr); 4639 ierr = MatView(B,out);CHKERRQ(ierr); 4640 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4641 ierr = MatDestroy(&B);CHKERRQ(ierr); 4642 PetscFunctionReturn(0); 4643 } 4644 4645 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4646 #undef __FUNCT__ 4647 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4648 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4649 { 4650 PetscErrorCode ierr; 4651 Mat_Merge_SeqsToMPI *merge; 4652 PetscContainer container; 4653 4654 PetscFunctionBegin; 4655 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4656 if (container) { 4657 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4658 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4659 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4660 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4661 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4662 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4663 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4664 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4665 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4666 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4667 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4668 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4669 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4670 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4671 ierr = PetscFree(merge);CHKERRQ(ierr); 4672 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4673 } 4674 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4675 PetscFunctionReturn(0); 4676 } 4677 4678 #include <../src/mat/utils/freespace.h> 4679 #include <petscbt.h> 4680 4681 #undef __FUNCT__ 4682 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4683 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4684 { 4685 PetscErrorCode ierr; 4686 MPI_Comm comm; 4687 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4688 PetscMPIInt size,rank,taga,*len_s; 4689 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4690 PetscInt proc,m; 4691 PetscInt **buf_ri,**buf_rj; 4692 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4693 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4694 MPI_Request *s_waits,*r_waits; 4695 MPI_Status *status; 4696 MatScalar *aa=a->a; 4697 MatScalar **abuf_r,*ba_i; 4698 Mat_Merge_SeqsToMPI *merge; 4699 PetscContainer container; 4700 4701 PetscFunctionBegin; 4702 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4703 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4704 4705 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4706 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4707 4708 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4709 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4710 4711 bi = merge->bi; 4712 bj = merge->bj; 4713 buf_ri = merge->buf_ri; 4714 buf_rj = merge->buf_rj; 4715 4716 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4717 owners = merge->rowmap->range; 4718 len_s = merge->len_s; 4719 4720 /* send and recv matrix values */ 4721 /*-----------------------------*/ 4722 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4723 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4724 4725 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4726 for (proc=0,k=0; proc<size; proc++) { 4727 if (!len_s[proc]) continue; 4728 i = owners[proc]; 4729 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4730 k++; 4731 } 4732 4733 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4734 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4735 ierr = PetscFree(status);CHKERRQ(ierr); 4736 4737 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4738 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4739 4740 /* insert mat values of mpimat */ 4741 /*----------------------------*/ 4742 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4743 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4744 4745 for (k=0; k<merge->nrecv; k++) { 4746 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4747 nrows = *(buf_ri_k[k]); 4748 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4749 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4750 } 4751 4752 /* set values of ba */ 4753 m = merge->rowmap->n; 4754 for (i=0; i<m; i++) { 4755 arow = owners[rank] + i; 4756 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4757 bnzi = bi[i+1] - bi[i]; 4758 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4759 4760 /* add local non-zero vals of this proc's seqmat into ba */ 4761 anzi = ai[arow+1] - ai[arow]; 4762 aj = a->j + ai[arow]; 4763 aa = a->a + ai[arow]; 4764 nextaj = 0; 4765 for (j=0; nextaj<anzi; j++) { 4766 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4767 ba_i[j] += aa[nextaj++]; 4768 } 4769 } 4770 4771 /* add received vals into ba */ 4772 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4773 /* i-th row */ 4774 if (i == *nextrow[k]) { 4775 anzi = *(nextai[k]+1) - *nextai[k]; 4776 aj = buf_rj[k] + *(nextai[k]); 4777 aa = abuf_r[k] + *(nextai[k]); 4778 nextaj = 0; 4779 for (j=0; nextaj<anzi; j++) { 4780 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4781 ba_i[j] += aa[nextaj++]; 4782 } 4783 } 4784 nextrow[k]++; nextai[k]++; 4785 } 4786 } 4787 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4788 } 4789 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4790 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4791 4792 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4793 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4794 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4795 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4796 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4797 PetscFunctionReturn(0); 4798 } 4799 4800 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4801 4802 #undef __FUNCT__ 4803 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4804 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4805 { 4806 PetscErrorCode ierr; 4807 Mat B_mpi; 4808 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4809 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4810 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4811 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4812 PetscInt len,proc,*dnz,*onz,bs,cbs; 4813 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4814 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4815 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4816 MPI_Status *status; 4817 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4818 PetscBT lnkbt; 4819 Mat_Merge_SeqsToMPI *merge; 4820 PetscContainer container; 4821 4822 PetscFunctionBegin; 4823 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4824 4825 /* make sure it is a PETSc comm */ 4826 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4827 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4828 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4829 4830 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4831 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4832 4833 /* determine row ownership */ 4834 /*---------------------------------------------------------*/ 4835 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4836 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4837 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4838 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4839 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4840 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4841 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4842 4843 m = merge->rowmap->n; 4844 owners = merge->rowmap->range; 4845 4846 /* determine the number of messages to send, their lengths */ 4847 /*---------------------------------------------------------*/ 4848 len_s = merge->len_s; 4849 4850 len = 0; /* length of buf_si[] */ 4851 merge->nsend = 0; 4852 for (proc=0; proc<size; proc++) { 4853 len_si[proc] = 0; 4854 if (proc == rank) { 4855 len_s[proc] = 0; 4856 } else { 4857 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4858 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4859 } 4860 if (len_s[proc]) { 4861 merge->nsend++; 4862 nrows = 0; 4863 for (i=owners[proc]; i<owners[proc+1]; i++) { 4864 if (ai[i+1] > ai[i]) nrows++; 4865 } 4866 len_si[proc] = 2*(nrows+1); 4867 len += len_si[proc]; 4868 } 4869 } 4870 4871 /* determine the number and length of messages to receive for ij-structure */ 4872 /*-------------------------------------------------------------------------*/ 4873 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4874 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4875 4876 /* post the Irecv of j-structure */ 4877 /*-------------------------------*/ 4878 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4879 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4880 4881 /* post the Isend of j-structure */ 4882 /*--------------------------------*/ 4883 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4884 4885 for (proc=0, k=0; proc<size; proc++) { 4886 if (!len_s[proc]) continue; 4887 i = owners[proc]; 4888 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4889 k++; 4890 } 4891 4892 /* receives and sends of j-structure are complete */ 4893 /*------------------------------------------------*/ 4894 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4895 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4896 4897 /* send and recv i-structure */ 4898 /*---------------------------*/ 4899 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4900 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4901 4902 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4903 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4904 for (proc=0,k=0; proc<size; proc++) { 4905 if (!len_s[proc]) continue; 4906 /* form outgoing message for i-structure: 4907 buf_si[0]: nrows to be sent 4908 [1:nrows]: row index (global) 4909 [nrows+1:2*nrows+1]: i-structure index 4910 */ 4911 /*-------------------------------------------*/ 4912 nrows = len_si[proc]/2 - 1; 4913 buf_si_i = buf_si + nrows+1; 4914 buf_si[0] = nrows; 4915 buf_si_i[0] = 0; 4916 nrows = 0; 4917 for (i=owners[proc]; i<owners[proc+1]; i++) { 4918 anzi = ai[i+1] - ai[i]; 4919 if (anzi) { 4920 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4921 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4922 nrows++; 4923 } 4924 } 4925 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4926 k++; 4927 buf_si += len_si[proc]; 4928 } 4929 4930 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4931 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4932 4933 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4934 for (i=0; i<merge->nrecv; i++) { 4935 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4936 } 4937 4938 ierr = PetscFree(len_si);CHKERRQ(ierr); 4939 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4940 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4941 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4942 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4943 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4944 ierr = PetscFree(status);CHKERRQ(ierr); 4945 4946 /* compute a local seq matrix in each processor */ 4947 /*----------------------------------------------*/ 4948 /* allocate bi array and free space for accumulating nonzero column info */ 4949 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4950 bi[0] = 0; 4951 4952 /* create and initialize a linked list */ 4953 nlnk = N+1; 4954 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4955 4956 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4957 len = ai[owners[rank+1]] - ai[owners[rank]]; 4958 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4959 4960 current_space = free_space; 4961 4962 /* determine symbolic info for each local row */ 4963 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4964 4965 for (k=0; k<merge->nrecv; k++) { 4966 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4967 nrows = *buf_ri_k[k]; 4968 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4969 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4970 } 4971 4972 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4973 len = 0; 4974 for (i=0; i<m; i++) { 4975 bnzi = 0; 4976 /* add local non-zero cols of this proc's seqmat into lnk */ 4977 arow = owners[rank] + i; 4978 anzi = ai[arow+1] - ai[arow]; 4979 aj = a->j + ai[arow]; 4980 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4981 bnzi += nlnk; 4982 /* add received col data into lnk */ 4983 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4984 if (i == *nextrow[k]) { /* i-th row */ 4985 anzi = *(nextai[k]+1) - *nextai[k]; 4986 aj = buf_rj[k] + *nextai[k]; 4987 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4988 bnzi += nlnk; 4989 nextrow[k]++; nextai[k]++; 4990 } 4991 } 4992 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4993 4994 /* if free space is not available, make more free space */ 4995 if (current_space->local_remaining<bnzi) { 4996 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4997 nspacedouble++; 4998 } 4999 /* copy data into free space, then initialize lnk */ 5000 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5001 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5002 5003 current_space->array += bnzi; 5004 current_space->local_used += bnzi; 5005 current_space->local_remaining -= bnzi; 5006 5007 bi[i+1] = bi[i] + bnzi; 5008 } 5009 5010 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5011 5012 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 5013 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5014 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5015 5016 /* create symbolic parallel matrix B_mpi */ 5017 /*---------------------------------------*/ 5018 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5019 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5020 if (n==PETSC_DECIDE) { 5021 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5022 } else { 5023 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5024 } 5025 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5026 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5027 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5028 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5029 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5030 5031 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5032 B_mpi->assembled = PETSC_FALSE; 5033 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5034 merge->bi = bi; 5035 merge->bj = bj; 5036 merge->buf_ri = buf_ri; 5037 merge->buf_rj = buf_rj; 5038 merge->coi = NULL; 5039 merge->coj = NULL; 5040 merge->owners_co = NULL; 5041 5042 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5043 5044 /* attach the supporting struct to B_mpi for reuse */ 5045 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5046 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5047 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5048 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5049 *mpimat = B_mpi; 5050 5051 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5052 PetscFunctionReturn(0); 5053 } 5054 5055 #undef __FUNCT__ 5056 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 5057 /*@C 5058 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 5059 matrices from each processor 5060 5061 Collective on MPI_Comm 5062 5063 Input Parameters: 5064 + comm - the communicators the parallel matrix will live on 5065 . seqmat - the input sequential matrices 5066 . m - number of local rows (or PETSC_DECIDE) 5067 . n - number of local columns (or PETSC_DECIDE) 5068 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5069 5070 Output Parameter: 5071 . mpimat - the parallel matrix generated 5072 5073 Level: advanced 5074 5075 Notes: 5076 The dimensions of the sequential matrix in each processor MUST be the same. 5077 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5078 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5079 @*/ 5080 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5081 { 5082 PetscErrorCode ierr; 5083 PetscMPIInt size; 5084 5085 PetscFunctionBegin; 5086 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5087 if (size == 1) { 5088 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5089 if (scall == MAT_INITIAL_MATRIX) { 5090 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5091 } else { 5092 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5093 } 5094 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5095 PetscFunctionReturn(0); 5096 } 5097 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5098 if (scall == MAT_INITIAL_MATRIX) { 5099 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5100 } 5101 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5102 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5103 PetscFunctionReturn(0); 5104 } 5105 5106 #undef __FUNCT__ 5107 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5108 /*@ 5109 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5110 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5111 with MatGetSize() 5112 5113 Not Collective 5114 5115 Input Parameters: 5116 + A - the matrix 5117 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5118 5119 Output Parameter: 5120 . A_loc - the local sequential matrix generated 5121 5122 Level: developer 5123 5124 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5125 5126 @*/ 5127 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5128 { 5129 PetscErrorCode ierr; 5130 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5131 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5132 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5133 MatScalar *aa=a->a,*ba=b->a,*cam; 5134 PetscScalar *ca; 5135 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5136 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5137 PetscBool match; 5138 5139 PetscFunctionBegin; 5140 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5141 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5142 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5143 if (scall == MAT_INITIAL_MATRIX) { 5144 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5145 ci[0] = 0; 5146 for (i=0; i<am; i++) { 5147 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5148 } 5149 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5150 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5151 k = 0; 5152 for (i=0; i<am; i++) { 5153 ncols_o = bi[i+1] - bi[i]; 5154 ncols_d = ai[i+1] - ai[i]; 5155 /* off-diagonal portion of A */ 5156 for (jo=0; jo<ncols_o; jo++) { 5157 col = cmap[*bj]; 5158 if (col >= cstart) break; 5159 cj[k] = col; bj++; 5160 ca[k++] = *ba++; 5161 } 5162 /* diagonal portion of A */ 5163 for (j=0; j<ncols_d; j++) { 5164 cj[k] = cstart + *aj++; 5165 ca[k++] = *aa++; 5166 } 5167 /* off-diagonal portion of A */ 5168 for (j=jo; j<ncols_o; j++) { 5169 cj[k] = cmap[*bj++]; 5170 ca[k++] = *ba++; 5171 } 5172 } 5173 /* put together the new matrix */ 5174 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5175 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5176 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5177 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5178 mat->free_a = PETSC_TRUE; 5179 mat->free_ij = PETSC_TRUE; 5180 mat->nonew = 0; 5181 } else if (scall == MAT_REUSE_MATRIX) { 5182 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5183 ci = mat->i; cj = mat->j; cam = mat->a; 5184 for (i=0; i<am; i++) { 5185 /* off-diagonal portion of A */ 5186 ncols_o = bi[i+1] - bi[i]; 5187 for (jo=0; jo<ncols_o; jo++) { 5188 col = cmap[*bj]; 5189 if (col >= cstart) break; 5190 *cam++ = *ba++; bj++; 5191 } 5192 /* diagonal portion of A */ 5193 ncols_d = ai[i+1] - ai[i]; 5194 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5195 /* off-diagonal portion of A */ 5196 for (j=jo; j<ncols_o; j++) { 5197 *cam++ = *ba++; bj++; 5198 } 5199 } 5200 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5201 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5202 PetscFunctionReturn(0); 5203 } 5204 5205 #undef __FUNCT__ 5206 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5207 /*@C 5208 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5209 5210 Not Collective 5211 5212 Input Parameters: 5213 + A - the matrix 5214 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5215 - row, col - index sets of rows and columns to extract (or NULL) 5216 5217 Output Parameter: 5218 . A_loc - the local sequential matrix generated 5219 5220 Level: developer 5221 5222 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5223 5224 @*/ 5225 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5226 { 5227 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5228 PetscErrorCode ierr; 5229 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5230 IS isrowa,iscola; 5231 Mat *aloc; 5232 PetscBool match; 5233 5234 PetscFunctionBegin; 5235 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5236 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5237 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5238 if (!row) { 5239 start = A->rmap->rstart; end = A->rmap->rend; 5240 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5241 } else { 5242 isrowa = *row; 5243 } 5244 if (!col) { 5245 start = A->cmap->rstart; 5246 cmap = a->garray; 5247 nzA = a->A->cmap->n; 5248 nzB = a->B->cmap->n; 5249 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5250 ncols = 0; 5251 for (i=0; i<nzB; i++) { 5252 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5253 else break; 5254 } 5255 imark = i; 5256 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5257 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5258 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5259 } else { 5260 iscola = *col; 5261 } 5262 if (scall != MAT_INITIAL_MATRIX) { 5263 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5264 aloc[0] = *A_loc; 5265 } 5266 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5267 *A_loc = aloc[0]; 5268 ierr = PetscFree(aloc);CHKERRQ(ierr); 5269 if (!row) { 5270 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5271 } 5272 if (!col) { 5273 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5274 } 5275 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5276 PetscFunctionReturn(0); 5277 } 5278 5279 #undef __FUNCT__ 5280 #define __FUNCT__ "MatGetBrowsOfAcols" 5281 /*@C 5282 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5283 5284 Collective on Mat 5285 5286 Input Parameters: 5287 + A,B - the matrices in mpiaij format 5288 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5289 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5290 5291 Output Parameter: 5292 + rowb, colb - index sets of rows and columns of B to extract 5293 - B_seq - the sequential matrix generated 5294 5295 Level: developer 5296 5297 @*/ 5298 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5299 { 5300 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5301 PetscErrorCode ierr; 5302 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5303 IS isrowb,iscolb; 5304 Mat *bseq=NULL; 5305 5306 PetscFunctionBegin; 5307 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5308 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5309 } 5310 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5311 5312 if (scall == MAT_INITIAL_MATRIX) { 5313 start = A->cmap->rstart; 5314 cmap = a->garray; 5315 nzA = a->A->cmap->n; 5316 nzB = a->B->cmap->n; 5317 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5318 ncols = 0; 5319 for (i=0; i<nzB; i++) { /* row < local row index */ 5320 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5321 else break; 5322 } 5323 imark = i; 5324 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5325 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5326 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5327 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5328 } else { 5329 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5330 isrowb = *rowb; iscolb = *colb; 5331 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5332 bseq[0] = *B_seq; 5333 } 5334 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5335 *B_seq = bseq[0]; 5336 ierr = PetscFree(bseq);CHKERRQ(ierr); 5337 if (!rowb) { 5338 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5339 } else { 5340 *rowb = isrowb; 5341 } 5342 if (!colb) { 5343 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5344 } else { 5345 *colb = iscolb; 5346 } 5347 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5348 PetscFunctionReturn(0); 5349 } 5350 5351 #undef __FUNCT__ 5352 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5353 /* 5354 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5355 of the OFF-DIAGONAL portion of local A 5356 5357 Collective on Mat 5358 5359 Input Parameters: 5360 + A,B - the matrices in mpiaij format 5361 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5362 5363 Output Parameter: 5364 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5365 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5366 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5367 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5368 5369 Level: developer 5370 5371 */ 5372 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5373 { 5374 VecScatter_MPI_General *gen_to,*gen_from; 5375 PetscErrorCode ierr; 5376 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5377 Mat_SeqAIJ *b_oth; 5378 VecScatter ctx =a->Mvctx; 5379 MPI_Comm comm; 5380 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5381 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5382 PetscScalar *rvalues,*svalues; 5383 MatScalar *b_otha,*bufa,*bufA; 5384 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5385 MPI_Request *rwaits = NULL,*swaits = NULL; 5386 MPI_Status *sstatus,rstatus; 5387 PetscMPIInt jj; 5388 PetscInt *cols,sbs,rbs; 5389 PetscScalar *vals; 5390 5391 PetscFunctionBegin; 5392 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5393 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5394 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5395 } 5396 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5397 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5398 5399 gen_to = (VecScatter_MPI_General*)ctx->todata; 5400 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5401 rvalues = gen_from->values; /* holds the length of receiving row */ 5402 svalues = gen_to->values; /* holds the length of sending row */ 5403 nrecvs = gen_from->n; 5404 nsends = gen_to->n; 5405 5406 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5407 srow = gen_to->indices; /* local row index to be sent */ 5408 sstarts = gen_to->starts; 5409 sprocs = gen_to->procs; 5410 sstatus = gen_to->sstatus; 5411 sbs = gen_to->bs; 5412 rstarts = gen_from->starts; 5413 rprocs = gen_from->procs; 5414 rbs = gen_from->bs; 5415 5416 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5417 if (scall == MAT_INITIAL_MATRIX) { 5418 /* i-array */ 5419 /*---------*/ 5420 /* post receives */ 5421 for (i=0; i<nrecvs; i++) { 5422 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5423 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5424 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5425 } 5426 5427 /* pack the outgoing message */ 5428 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5429 5430 sstartsj[0] = 0; 5431 rstartsj[0] = 0; 5432 len = 0; /* total length of j or a array to be sent */ 5433 k = 0; 5434 for (i=0; i<nsends; i++) { 5435 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5436 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5437 for (j=0; j<nrows; j++) { 5438 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5439 for (l=0; l<sbs; l++) { 5440 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5441 5442 rowlen[j*sbs+l] = ncols; 5443 5444 len += ncols; 5445 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5446 } 5447 k++; 5448 } 5449 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5450 5451 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5452 } 5453 /* recvs and sends of i-array are completed */ 5454 i = nrecvs; 5455 while (i--) { 5456 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5457 } 5458 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5459 5460 /* allocate buffers for sending j and a arrays */ 5461 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5462 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5463 5464 /* create i-array of B_oth */ 5465 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5466 5467 b_othi[0] = 0; 5468 len = 0; /* total length of j or a array to be received */ 5469 k = 0; 5470 for (i=0; i<nrecvs; i++) { 5471 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5472 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5473 for (j=0; j<nrows; j++) { 5474 b_othi[k+1] = b_othi[k] + rowlen[j]; 5475 len += rowlen[j]; k++; 5476 } 5477 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5478 } 5479 5480 /* allocate space for j and a arrrays of B_oth */ 5481 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5482 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5483 5484 /* j-array */ 5485 /*---------*/ 5486 /* post receives of j-array */ 5487 for (i=0; i<nrecvs; i++) { 5488 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5489 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5490 } 5491 5492 /* pack the outgoing message j-array */ 5493 k = 0; 5494 for (i=0; i<nsends; i++) { 5495 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5496 bufJ = bufj+sstartsj[i]; 5497 for (j=0; j<nrows; j++) { 5498 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5499 for (ll=0; ll<sbs; ll++) { 5500 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5501 for (l=0; l<ncols; l++) { 5502 *bufJ++ = cols[l]; 5503 } 5504 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5505 } 5506 } 5507 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5508 } 5509 5510 /* recvs and sends of j-array are completed */ 5511 i = nrecvs; 5512 while (i--) { 5513 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5514 } 5515 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5516 } else if (scall == MAT_REUSE_MATRIX) { 5517 sstartsj = *startsj_s; 5518 rstartsj = *startsj_r; 5519 bufa = *bufa_ptr; 5520 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5521 b_otha = b_oth->a; 5522 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5523 5524 /* a-array */ 5525 /*---------*/ 5526 /* post receives of a-array */ 5527 for (i=0; i<nrecvs; i++) { 5528 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5529 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5530 } 5531 5532 /* pack the outgoing message a-array */ 5533 k = 0; 5534 for (i=0; i<nsends; i++) { 5535 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5536 bufA = bufa+sstartsj[i]; 5537 for (j=0; j<nrows; j++) { 5538 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5539 for (ll=0; ll<sbs; ll++) { 5540 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5541 for (l=0; l<ncols; l++) { 5542 *bufA++ = vals[l]; 5543 } 5544 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5545 } 5546 } 5547 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5548 } 5549 /* recvs and sends of a-array are completed */ 5550 i = nrecvs; 5551 while (i--) { 5552 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5553 } 5554 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5555 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5556 5557 if (scall == MAT_INITIAL_MATRIX) { 5558 /* put together the new matrix */ 5559 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5560 5561 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5562 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5563 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5564 b_oth->free_a = PETSC_TRUE; 5565 b_oth->free_ij = PETSC_TRUE; 5566 b_oth->nonew = 0; 5567 5568 ierr = PetscFree(bufj);CHKERRQ(ierr); 5569 if (!startsj_s || !bufa_ptr) { 5570 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5571 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5572 } else { 5573 *startsj_s = sstartsj; 5574 *startsj_r = rstartsj; 5575 *bufa_ptr = bufa; 5576 } 5577 } 5578 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5579 PetscFunctionReturn(0); 5580 } 5581 5582 #undef __FUNCT__ 5583 #define __FUNCT__ "MatGetCommunicationStructs" 5584 /*@C 5585 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5586 5587 Not Collective 5588 5589 Input Parameters: 5590 . A - The matrix in mpiaij format 5591 5592 Output Parameter: 5593 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5594 . colmap - A map from global column index to local index into lvec 5595 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5596 5597 Level: developer 5598 5599 @*/ 5600 #if defined(PETSC_USE_CTABLE) 5601 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5602 #else 5603 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5604 #endif 5605 { 5606 Mat_MPIAIJ *a; 5607 5608 PetscFunctionBegin; 5609 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5610 PetscValidPointer(lvec, 2); 5611 PetscValidPointer(colmap, 3); 5612 PetscValidPointer(multScatter, 4); 5613 a = (Mat_MPIAIJ*) A->data; 5614 if (lvec) *lvec = a->lvec; 5615 if (colmap) *colmap = a->colmap; 5616 if (multScatter) *multScatter = a->Mvctx; 5617 PetscFunctionReturn(0); 5618 } 5619 5620 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5621 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5622 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5623 5624 #undef __FUNCT__ 5625 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5626 /* 5627 Computes (B'*A')' since computing B*A directly is untenable 5628 5629 n p p 5630 ( ) ( ) ( ) 5631 m ( A ) * n ( B ) = m ( C ) 5632 ( ) ( ) ( ) 5633 5634 */ 5635 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5636 { 5637 PetscErrorCode ierr; 5638 Mat At,Bt,Ct; 5639 5640 PetscFunctionBegin; 5641 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5642 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5643 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5644 ierr = MatDestroy(&At);CHKERRQ(ierr); 5645 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5646 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5647 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5648 PetscFunctionReturn(0); 5649 } 5650 5651 #undef __FUNCT__ 5652 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5653 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5654 { 5655 PetscErrorCode ierr; 5656 PetscInt m=A->rmap->n,n=B->cmap->n; 5657 Mat Cmat; 5658 5659 PetscFunctionBegin; 5660 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5661 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5662 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5663 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5664 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5665 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5666 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5667 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5668 5669 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5670 5671 *C = Cmat; 5672 PetscFunctionReturn(0); 5673 } 5674 5675 /* ----------------------------------------------------------------*/ 5676 #undef __FUNCT__ 5677 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5678 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5679 { 5680 PetscErrorCode ierr; 5681 5682 PetscFunctionBegin; 5683 if (scall == MAT_INITIAL_MATRIX) { 5684 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5685 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5686 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5687 } 5688 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5689 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5690 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5691 PetscFunctionReturn(0); 5692 } 5693 5694 #if defined(PETSC_HAVE_MUMPS) 5695 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5696 #endif 5697 #if defined(PETSC_HAVE_PASTIX) 5698 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5699 #endif 5700 #if defined(PETSC_HAVE_SUPERLU_DIST) 5701 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5702 #endif 5703 #if defined(PETSC_HAVE_CLIQUE) 5704 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5705 #endif 5706 5707 /*MC 5708 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5709 5710 Options Database Keys: 5711 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5712 5713 Level: beginner 5714 5715 .seealso: MatCreateAIJ() 5716 M*/ 5717 5718 #undef __FUNCT__ 5719 #define __FUNCT__ "MatCreate_MPIAIJ" 5720 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5721 { 5722 Mat_MPIAIJ *b; 5723 PetscErrorCode ierr; 5724 PetscMPIInt size; 5725 5726 PetscFunctionBegin; 5727 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5728 5729 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5730 B->data = (void*)b; 5731 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5732 B->assembled = PETSC_FALSE; 5733 B->insertmode = NOT_SET_VALUES; 5734 b->size = size; 5735 5736 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5737 5738 /* build cache for off array entries formed */ 5739 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5740 5741 b->donotstash = PETSC_FALSE; 5742 b->colmap = 0; 5743 b->garray = 0; 5744 b->roworiented = PETSC_TRUE; 5745 5746 /* stuff used for matrix vector multiply */ 5747 b->lvec = NULL; 5748 b->Mvctx = NULL; 5749 5750 /* stuff for MatGetRow() */ 5751 b->rowindices = 0; 5752 b->rowvalues = 0; 5753 b->getrowactive = PETSC_FALSE; 5754 5755 /* flexible pointer used in CUSP/CUSPARSE classes */ 5756 b->spptr = NULL; 5757 5758 #if defined(PETSC_HAVE_MUMPS) 5759 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5760 #endif 5761 #if defined(PETSC_HAVE_PASTIX) 5762 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5763 #endif 5764 #if defined(PETSC_HAVE_SUPERLU_DIST) 5765 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5766 #endif 5767 #if defined(PETSC_HAVE_CLIQUE) 5768 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5769 #endif 5770 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5771 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5772 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5773 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5774 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5775 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5776 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5777 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5778 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5779 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5780 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5781 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5782 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5783 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5784 PetscFunctionReturn(0); 5785 } 5786 5787 #undef __FUNCT__ 5788 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5789 /*@ 5790 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5791 and "off-diagonal" part of the matrix in CSR format. 5792 5793 Collective on MPI_Comm 5794 5795 Input Parameters: 5796 + comm - MPI communicator 5797 . m - number of local rows (Cannot be PETSC_DECIDE) 5798 . n - This value should be the same as the local size used in creating the 5799 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5800 calculated if N is given) For square matrices n is almost always m. 5801 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5802 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5803 . i - row indices for "diagonal" portion of matrix 5804 . j - column indices 5805 . a - matrix values 5806 . oi - row indices for "off-diagonal" portion of matrix 5807 . oj - column indices 5808 - oa - matrix values 5809 5810 Output Parameter: 5811 . mat - the matrix 5812 5813 Level: advanced 5814 5815 Notes: 5816 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5817 must free the arrays once the matrix has been destroyed and not before. 5818 5819 The i and j indices are 0 based 5820 5821 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5822 5823 This sets local rows and cannot be used to set off-processor values. 5824 5825 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5826 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5827 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5828 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5829 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5830 communication if it is known that only local entries will be set. 5831 5832 .keywords: matrix, aij, compressed row, sparse, parallel 5833 5834 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5835 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5836 @*/ 5837 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5838 { 5839 PetscErrorCode ierr; 5840 Mat_MPIAIJ *maij; 5841 5842 PetscFunctionBegin; 5843 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5844 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5845 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5846 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5847 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5848 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5849 maij = (Mat_MPIAIJ*) (*mat)->data; 5850 5851 (*mat)->preallocated = PETSC_TRUE; 5852 5853 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5854 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5855 5856 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5857 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5858 5859 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5860 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5861 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5862 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5863 5864 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5865 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5866 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5867 PetscFunctionReturn(0); 5868 } 5869 5870 /* 5871 Special version for direct calls from Fortran 5872 */ 5873 #include <petsc-private/fortranimpl.h> 5874 5875 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5876 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5877 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5878 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5879 #endif 5880 5881 /* Change these macros so can be used in void function */ 5882 #undef CHKERRQ 5883 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5884 #undef SETERRQ2 5885 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5886 #undef SETERRQ3 5887 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5888 #undef SETERRQ 5889 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5890 5891 #undef __FUNCT__ 5892 #define __FUNCT__ "matsetvaluesmpiaij_" 5893 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5894 { 5895 Mat mat = *mmat; 5896 PetscInt m = *mm, n = *mn; 5897 InsertMode addv = *maddv; 5898 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5899 PetscScalar value; 5900 PetscErrorCode ierr; 5901 5902 MatCheckPreallocated(mat,1); 5903 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5904 5905 #if defined(PETSC_USE_DEBUG) 5906 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5907 #endif 5908 { 5909 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5910 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5911 PetscBool roworiented = aij->roworiented; 5912 5913 /* Some Variables required in the macro */ 5914 Mat A = aij->A; 5915 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5916 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5917 MatScalar *aa = a->a; 5918 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5919 Mat B = aij->B; 5920 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5921 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5922 MatScalar *ba = b->a; 5923 5924 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5925 PetscInt nonew = a->nonew; 5926 MatScalar *ap1,*ap2; 5927 5928 PetscFunctionBegin; 5929 for (i=0; i<m; i++) { 5930 if (im[i] < 0) continue; 5931 #if defined(PETSC_USE_DEBUG) 5932 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5933 #endif 5934 if (im[i] >= rstart && im[i] < rend) { 5935 row = im[i] - rstart; 5936 lastcol1 = -1; 5937 rp1 = aj + ai[row]; 5938 ap1 = aa + ai[row]; 5939 rmax1 = aimax[row]; 5940 nrow1 = ailen[row]; 5941 low1 = 0; 5942 high1 = nrow1; 5943 lastcol2 = -1; 5944 rp2 = bj + bi[row]; 5945 ap2 = ba + bi[row]; 5946 rmax2 = bimax[row]; 5947 nrow2 = bilen[row]; 5948 low2 = 0; 5949 high2 = nrow2; 5950 5951 for (j=0; j<n; j++) { 5952 if (roworiented) value = v[i*n+j]; 5953 else value = v[i+j*m]; 5954 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5955 if (in[j] >= cstart && in[j] < cend) { 5956 col = in[j] - cstart; 5957 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5958 } else if (in[j] < 0) continue; 5959 #if defined(PETSC_USE_DEBUG) 5960 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5961 #endif 5962 else { 5963 if (mat->was_assembled) { 5964 if (!aij->colmap) { 5965 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5966 } 5967 #if defined(PETSC_USE_CTABLE) 5968 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5969 col--; 5970 #else 5971 col = aij->colmap[in[j]] - 1; 5972 #endif 5973 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5974 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5975 col = in[j]; 5976 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5977 B = aij->B; 5978 b = (Mat_SeqAIJ*)B->data; 5979 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5980 rp2 = bj + bi[row]; 5981 ap2 = ba + bi[row]; 5982 rmax2 = bimax[row]; 5983 nrow2 = bilen[row]; 5984 low2 = 0; 5985 high2 = nrow2; 5986 bm = aij->B->rmap->n; 5987 ba = b->a; 5988 } 5989 } else col = in[j]; 5990 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5991 } 5992 } 5993 } else if (!aij->donotstash) { 5994 if (roworiented) { 5995 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5996 } else { 5997 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5998 } 5999 } 6000 } 6001 } 6002 PetscFunctionReturnVoid(); 6003 } 6004 6005