1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 6 /*MC 7 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 8 9 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 10 and MATMPIAIJ otherwise. As a result, for single process communicators, 11 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 12 for communicators controlling multiple processes. It is recommended that you call both of 13 the above preallocation routines for simplicity. 14 15 Options Database Keys: 16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 17 18 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 19 enough exist. 20 21 Level: beginner 22 23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 24 M*/ 25 26 /*MC 27 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 28 29 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 30 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 31 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 32 for communicators controlling multiple processes. It is recommended that you call both of 33 the above preallocation routines for simplicity. 34 35 Options Database Keys: 36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 37 38 Level: beginner 39 40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 41 M*/ 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 46 { 47 PetscErrorCode ierr; 48 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 49 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 51 const PetscInt *ia,*ib; 52 const MatScalar *aa,*bb; 53 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 54 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 55 56 PetscFunctionBegin; 57 *keptrows = 0; 58 ia = a->i; 59 ib = b->i; 60 for (i=0; i<m; i++) { 61 na = ia[i+1] - ia[i]; 62 nb = ib[i+1] - ib[i]; 63 if (!na && !nb) { 64 cnt++; 65 goto ok1; 66 } 67 aa = a->a + ia[i]; 68 for (j=0; j<na; j++) { 69 if (aa[j] != 0.0) goto ok1; 70 } 71 bb = b->a + ib[i]; 72 for (j=0; j <nb; j++) { 73 if (bb[j] != 0.0) goto ok1; 74 } 75 cnt++; 76 ok1:; 77 } 78 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 79 if (!n0rows) PetscFunctionReturn(0); 80 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 81 cnt = 0; 82 for (i=0; i<m; i++) { 83 na = ia[i+1] - ia[i]; 84 nb = ib[i+1] - ib[i]; 85 if (!na && !nb) continue; 86 aa = a->a + ia[i]; 87 for (j=0; j<na;j++) { 88 if (aa[j] != 0.0) { 89 rows[cnt++] = rstart + i; 90 goto ok2; 91 } 92 } 93 bb = b->a + ib[i]; 94 for (j=0; j<nb; j++) { 95 if (bb[j] != 0.0) { 96 rows[cnt++] = rstart + i; 97 goto ok2; 98 } 99 } 100 ok2:; 101 } 102 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 109 { 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 111 PetscErrorCode ierr; 112 PetscInt i,rstart,nrows,*rows; 113 114 PetscFunctionBegin; 115 *zrows = NULL; 116 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 117 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 118 for (i=0; i<nrows; i++) rows[i] += rstart; 119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 126 { 127 PetscErrorCode ierr; 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 129 PetscInt i,n,*garray = aij->garray; 130 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 131 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 132 PetscReal *work; 133 134 PetscFunctionBegin; 135 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 136 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 137 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 */ 181 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 182 { 183 PetscMPIInt rank,size; 184 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 185 PetscErrorCode ierr; 186 Mat mat; 187 Mat_SeqAIJ *gmata; 188 PetscMPIInt tag; 189 MPI_Status status; 190 PetscBool aij; 191 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 192 193 PetscFunctionBegin; 194 CHKMEMQ; 195 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 197 if (!rank) { 198 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 199 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 200 } 201 if (reuse == MAT_INITIAL_MATRIX) { 202 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 203 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 204 if (!rank) { 205 bses[0] = gmat->rmap->bs; 206 bses[1] = gmat->cmap->bs; 207 } 208 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 209 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 210 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 211 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 212 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 213 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 214 215 rowners[0] = 0; 216 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 299 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 300 301 *inmat = mat; 302 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 303 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 304 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 305 mat = *inmat; 306 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 307 if (!rank) { 308 /* send numerical values to other processes */ 309 gmata = (Mat_SeqAIJ*) gmat->data; 310 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 311 gmataa = gmata->a; 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 315 } 316 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 317 } else { 318 /* receive numerical values from process 0*/ 319 nz = Ad->nz + Ao->nz; 320 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 321 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 322 } 323 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 324 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 325 ad = Ad->a; 326 ao = Ao->a; 327 if (mat->rmap->n) { 328 i = 0; 329 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 for (i=1; i<mat->rmap->n; i++) { 333 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 334 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 335 } 336 i--; 337 if (mat->rmap->n) { 338 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 339 } 340 if (rank) { 341 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 342 } 343 } 344 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 345 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 346 CHKMEMQ; 347 PetscFunctionReturn(0); 348 } 349 350 /* 351 Local utility routine that creates a mapping from the global column 352 number to the local number in the off-diagonal part of the local 353 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 354 a slightly higher hash table cost; without it it is not scalable (each processor 355 has an order N integer array but is fast to acess. 356 */ 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 359 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 360 { 361 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt n = aij->B->cmap->n,i; 364 365 PetscFunctionBegin; 366 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 367 #if defined(PETSC_USE_CTABLE) 368 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 369 for (i=0; i<n; i++) { 370 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 371 } 372 #else 373 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 374 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 375 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 377 #endif 378 PetscFunctionReturn(0); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 382 { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col;\ 386 while (high1-low1 > 5) { \ 387 t = (low1+high1)/2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i=low1; _i<high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) ap1[_i] += value; \ 395 else ap1[_i] = value; \ 396 goto a_noinsert; \ 397 } \ 398 } \ 399 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 400 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 401 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 402 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 403 N = nrow1++ - 1; a->nz++; high1++; \ 404 /* shift up all the later entries in this row */ \ 405 for (ii=N; ii>=_i; ii--) { \ 406 rp1[ii+1] = rp1[ii]; \ 407 ap1[ii+1] = ap1[ii]; \ 408 } \ 409 rp1[_i] = col; \ 410 ap1[_i] = value; \ 411 a_noinsert: ; \ 412 ailen[row] = nrow1; \ 413 } 414 415 416 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 417 { \ 418 if (col <= lastcol2) low2 = 0; \ 419 else high2 = nrow2; \ 420 lastcol2 = col; \ 421 while (high2-low2 > 5) { \ 422 t = (low2+high2)/2; \ 423 if (rp2[t] > col) high2 = t; \ 424 else low2 = t; \ 425 } \ 426 for (_i=low2; _i<high2; _i++) { \ 427 if (rp2[_i] > col) break; \ 428 if (rp2[_i] == col) { \ 429 if (addv == ADD_VALUES) ap2[_i] += value; \ 430 else ap2[_i] = value; \ 431 goto b_noinsert; \ 432 } \ 433 } \ 434 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 435 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 437 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 438 N = nrow2++ - 1; b->nz++; high2++; \ 439 /* shift up all the later entries in this row */ \ 440 for (ii=N; ii>=_i; ii--) { \ 441 rp2[ii+1] = rp2[ii]; \ 442 ap2[ii+1] = ap2[ii]; \ 443 } \ 444 rp2[_i] = col; \ 445 ap2[_i] = value; \ 446 b_noinsert: ; \ 447 bilen[row] = nrow2; \ 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 452 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 453 { 454 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 455 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 456 PetscErrorCode ierr; 457 PetscInt l,*garray = mat->garray,diag; 458 459 PetscFunctionBegin; 460 /* code only works for square matrices A */ 461 462 /* find size of row to the left of the diagonal part */ 463 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 464 row = row - diag; 465 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 466 if (garray[b->j[b->i[row]+l]] > diag) break; 467 } 468 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 469 470 /* diagonal part */ 471 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 472 473 /* right of diagonal part */ 474 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatSetValues_MPIAIJ" 480 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 PetscScalar value; 484 PetscErrorCode ierr; 485 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 486 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 487 PetscBool roworiented = aij->roworiented; 488 489 /* Some Variables required in the macro */ 490 Mat A = aij->A; 491 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 492 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 493 MatScalar *aa = a->a; 494 PetscBool ignorezeroentries = a->ignorezeroentries; 495 Mat B = aij->B; 496 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 497 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 498 MatScalar *ba = b->a; 499 500 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 501 PetscInt nonew; 502 MatScalar *ap1,*ap2; 503 504 PetscFunctionBegin; 505 if (v) PetscValidScalarPointer(v,6); 506 for (i=0; i<m; i++) { 507 if (im[i] < 0) continue; 508 #if defined(PETSC_USE_DEBUG) 509 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 510 #endif 511 if (im[i] >= rstart && im[i] < rend) { 512 row = im[i] - rstart; 513 lastcol1 = -1; 514 rp1 = aj + ai[row]; 515 ap1 = aa + ai[row]; 516 rmax1 = aimax[row]; 517 nrow1 = ailen[row]; 518 low1 = 0; 519 high1 = nrow1; 520 lastcol2 = -1; 521 rp2 = bj + bi[row]; 522 ap2 = ba + bi[row]; 523 rmax2 = bimax[row]; 524 nrow2 = bilen[row]; 525 low2 = 0; 526 high2 = nrow2; 527 528 for (j=0; j<n; j++) { 529 if (v) { 530 if (roworiented) value = v[i*n+j]; 531 else value = v[i+j*m]; 532 } else value = 0.0; 533 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 534 if (in[j] >= cstart && in[j] < cend) { 535 col = in[j] - cstart; 536 nonew = a->nonew; 537 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 538 } else if (in[j] < 0) continue; 539 #if defined(PETSC_USE_DEBUG) 540 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 541 #endif 542 else { 543 if (mat->was_assembled) { 544 if (!aij->colmap) { 545 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 546 } 547 #if defined(PETSC_USE_CTABLE) 548 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 549 col--; 550 #else 551 col = aij->colmap[in[j]] - 1; 552 #endif 553 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 554 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 555 col = in[j]; 556 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 557 B = aij->B; 558 b = (Mat_SeqAIJ*)B->data; 559 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 560 rp2 = bj + bi[row]; 561 ap2 = ba + bi[row]; 562 rmax2 = bimax[row]; 563 nrow2 = bilen[row]; 564 low2 = 0; 565 high2 = nrow2; 566 bm = aij->B->rmap->n; 567 ba = b->a; 568 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 569 } else col = in[j]; 570 nonew = b->nonew; 571 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 572 } 573 } 574 } else { 575 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 576 if (!aij->donotstash) { 577 mat->assembled = PETSC_FALSE; 578 if (roworiented) { 579 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 580 } else { 581 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 582 } 583 } 584 } 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatGetValues_MPIAIJ" 591 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 592 { 593 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 596 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 597 598 PetscFunctionBegin; 599 for (i=0; i<m; i++) { 600 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 601 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 602 if (idxm[i] >= rstart && idxm[i] < rend) { 603 row = idxm[i] - rstart; 604 for (j=0; j<n; j++) { 605 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 606 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 607 if (idxn[j] >= cstart && idxn[j] < cend) { 608 col = idxn[j] - cstart; 609 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 610 } else { 611 if (!aij->colmap) { 612 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 613 } 614 #if defined(PETSC_USE_CTABLE) 615 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 616 col--; 617 #else 618 col = aij->colmap[idxn[j]] - 1; 619 #endif 620 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 621 else { 622 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 623 } 624 } 625 } 626 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 627 } 628 PetscFunctionReturn(0); 629 } 630 631 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 635 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 636 { 637 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 638 PetscErrorCode ierr; 639 PetscInt nstash,reallocs; 640 InsertMode addv; 641 642 PetscFunctionBegin; 643 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 644 645 /* make sure all processors are either in INSERTMODE or ADDMODE */ 646 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 647 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 648 mat->insertmode = addv; /* in case this processor had no cache */ 649 650 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 651 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 652 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 659 { 660 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 661 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 662 PetscErrorCode ierr; 663 PetscMPIInt n; 664 PetscInt i,j,rstart,ncols,flg; 665 PetscInt *row,*col; 666 PetscBool other_disassembled; 667 PetscScalar *val; 668 InsertMode addv = mat->insertmode; 669 670 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 671 672 PetscFunctionBegin; 673 if (!aij->donotstash && !mat->nooffprocentries) { 674 while (1) { 675 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 676 if (!flg) break; 677 678 for (i=0; i<n; ) { 679 /* Now identify the consecutive vals belonging to the same row */ 680 for (j=i,rstart=row[j]; j<n; j++) { 681 if (row[j] != rstart) break; 682 } 683 if (j < n) ncols = j-i; 684 else ncols = n-i; 685 /* Now assemble all these values with a single function call */ 686 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 687 688 i = j; 689 } 690 } 691 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 692 } 693 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 694 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 695 696 /* determine if any processor has disassembled, if so we must 697 also disassemble ourselfs, in order that we may reassemble. */ 698 /* 699 if nonzero structure of submatrix B cannot change then we know that 700 no processor disassembled thus we can skip this stuff 701 */ 702 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 703 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 704 if (mat->was_assembled && !other_disassembled) { 705 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 706 } 707 } 708 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 709 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 710 } 711 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 712 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 714 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 715 716 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 717 718 aij->rowvalues = 0; 719 720 /* used by MatAXPY() */ 721 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 722 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 723 724 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 725 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 738 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_MPIAIJ" 744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 745 { 746 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 747 PetscErrorCode ierr; 748 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 749 PetscInt i,*owners = A->rmap->range; 750 PetscInt *nprocs,j,idx,nsends,row; 751 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 752 PetscInt *rvalues,count,base,slen,*source; 753 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 754 MPI_Comm comm; 755 MPI_Request *send_waits,*recv_waits; 756 MPI_Status recv_status,*send_status; 757 const PetscScalar *xx; 758 PetscScalar *bb; 759 #if defined(PETSC_DEBUG) 760 PetscBool found = PETSC_FALSE; 761 #endif 762 763 PetscFunctionBegin; 764 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 765 /* first count number of contributors to each processor */ 766 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 767 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 768 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 769 j = 0; 770 for (i=0; i<N; i++) { 771 if (lastidx > (idx = rows[i])) j = 0; 772 lastidx = idx; 773 for (; j<size; j++) { 774 if (idx >= owners[j] && idx < owners[j+1]) { 775 nprocs[2*j]++; 776 nprocs[2*j+1] = 1; 777 owner[i] = j; 778 #if defined(PETSC_DEBUG) 779 found = PETSC_TRUE; 780 #endif 781 break; 782 } 783 } 784 #if defined(PETSC_DEBUG) 785 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 786 found = PETSC_FALSE; 787 #endif 788 } 789 nsends = 0; 790 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 791 792 if (A->nooffproczerorows) { 793 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 794 nrecvs = nsends; 795 nmax = N; 796 } else { 797 /* inform other processors of number of messages and max length*/ 798 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 799 } 800 801 /* post receives: */ 802 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 804 for (i=0; i<nrecvs; i++) { 805 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 806 } 807 808 /* do sends: 809 1) starts[i] gives the starting index in svalues for stuff going to 810 the ith processor 811 */ 812 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 813 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 814 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 815 816 starts[0] = 0; 817 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 818 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 819 820 starts[0] = 0; 821 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 822 count = 0; 823 for (i=0; i<size; i++) { 824 if (nprocs[2*i+1]) { 825 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 826 } 827 } 828 ierr = PetscFree(starts);CHKERRQ(ierr); 829 830 base = owners[rank]; 831 832 /* wait on receives */ 833 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 834 count = nrecvs; slen = 0; 835 while (count) { 836 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 837 /* unpack receives into our local space */ 838 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 839 840 source[imdex] = recv_status.MPI_SOURCE; 841 lens[imdex] = n; 842 slen += n; 843 count--; 844 } 845 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 846 847 /* move the data into the send scatter */ 848 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 849 count = 0; 850 for (i=0; i<nrecvs; i++) { 851 values = rvalues + i*nmax; 852 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 853 } 854 ierr = PetscFree(rvalues);CHKERRQ(ierr); 855 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 856 ierr = PetscFree(owner);CHKERRQ(ierr); 857 ierr = PetscFree(nprocs);CHKERRQ(ierr); 858 859 /* fix right hand side if needed */ 860 if (x && b) { 861 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 862 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 863 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 864 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 865 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 866 } 867 /* 868 Zero the required rows. If the "diagonal block" of the matrix 869 is square and the user wishes to set the diagonal we use separate 870 code so that MatSetValues() is not called for each diagonal allocating 871 new memory, thus calling lots of mallocs and slowing things down. 872 873 */ 874 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 875 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 876 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 877 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 878 } else if (diag != 0.0) { 879 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 880 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 881 for (i = 0; i < slen; i++) { 882 row = lrows[i] + rstart; 883 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 884 } 885 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 886 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 } else { 888 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(lrows);CHKERRQ(ierr); 891 892 /* wait on sends */ 893 if (nsends) { 894 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 895 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 896 ierr = PetscFree(send_status);CHKERRQ(ierr); 897 } 898 ierr = PetscFree(send_waits);CHKERRQ(ierr); 899 ierr = PetscFree(svalues);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 905 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 906 { 907 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 908 PetscErrorCode ierr; 909 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 910 PetscInt i,*owners = A->rmap->range; 911 PetscInt *nprocs,j,idx,nsends; 912 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 913 PetscInt *rvalues,count,base,slen,*source; 914 PetscInt *lens,*lrows,*values,m; 915 MPI_Comm comm; 916 MPI_Request *send_waits,*recv_waits; 917 MPI_Status recv_status,*send_status; 918 const PetscScalar *xx; 919 PetscScalar *bb,*mask; 920 Vec xmask,lmask; 921 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 922 const PetscInt *aj, *ii,*ridx; 923 PetscScalar *aa; 924 #if defined(PETSC_DEBUG) 925 PetscBool found = PETSC_FALSE; 926 #endif 927 928 PetscFunctionBegin; 929 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 930 /* first count number of contributors to each processor */ 931 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 932 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 933 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 934 j = 0; 935 for (i=0; i<N; i++) { 936 if (lastidx > (idx = rows[i])) j = 0; 937 lastidx = idx; 938 for (; j<size; j++) { 939 if (idx >= owners[j] && idx < owners[j+1]) { 940 nprocs[2*j]++; 941 nprocs[2*j+1] = 1; 942 owner[i] = j; 943 #if defined(PETSC_DEBUG) 944 found = PETSC_TRUE; 945 #endif 946 break; 947 } 948 } 949 #if defined(PETSC_DEBUG) 950 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 951 found = PETSC_FALSE; 952 #endif 953 } 954 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 955 956 /* inform other processors of number of messages and max length*/ 957 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 958 959 /* post receives: */ 960 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 961 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 962 for (i=0; i<nrecvs; i++) { 963 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 964 } 965 966 /* do sends: 967 1) starts[i] gives the starting index in svalues for stuff going to 968 the ith processor 969 */ 970 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 971 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 972 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 973 974 starts[0] = 0; 975 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 976 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 977 978 starts[0] = 0; 979 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 980 count = 0; 981 for (i=0; i<size; i++) { 982 if (nprocs[2*i+1]) { 983 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 984 } 985 } 986 ierr = PetscFree(starts);CHKERRQ(ierr); 987 988 base = owners[rank]; 989 990 /* wait on receives */ 991 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 992 count = nrecvs; slen = 0; 993 while (count) { 994 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 995 /* unpack receives into our local space */ 996 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 997 998 source[imdex] = recv_status.MPI_SOURCE; 999 lens[imdex] = n; 1000 slen += n; 1001 count--; 1002 } 1003 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1004 1005 /* move the data into the send scatter */ 1006 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1007 count = 0; 1008 for (i=0; i<nrecvs; i++) { 1009 values = rvalues + i*nmax; 1010 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1011 } 1012 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1013 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1014 ierr = PetscFree(owner);CHKERRQ(ierr); 1015 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1016 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1017 1018 /* zero diagonal part of matrix */ 1019 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1020 1021 /* handle off diagonal part of matrix */ 1022 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1023 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1024 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1025 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use) { 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++) { 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMult_MPIAIJ" 1101 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 PetscInt nt; 1106 1107 PetscFunctionBegin; 1108 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1109 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1110 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1112 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1119 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1120 { 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 1136 PetscFunctionBegin; 1137 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1139 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1147 { 1148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1149 PetscErrorCode ierr; 1150 PetscBool merged; 1151 1152 PetscFunctionBegin; 1153 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1154 /* do nondiagonal part */ 1155 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1156 if (!merged) { 1157 /* send it on its way */ 1158 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 /* do local part */ 1160 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1161 /* receive remote parts: note this assumes the values are not actually */ 1162 /* added in yy until the next line, */ 1163 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1164 } else { 1165 /* do local part */ 1166 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1167 /* send it on its way */ 1168 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1169 /* values actually were received in the Begin() but we need to call this nop */ 1170 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1177 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1178 { 1179 MPI_Comm comm; 1180 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1181 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1182 IS Me,Notme; 1183 PetscErrorCode ierr; 1184 PetscInt M,N,first,last,*notme,i; 1185 PetscMPIInt size; 1186 1187 PetscFunctionBegin; 1188 /* Easy test: symmetric diagonal block */ 1189 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1190 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1191 if (!*f) PetscFunctionReturn(0); 1192 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1193 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1194 if (size == 1) PetscFunctionReturn(0); 1195 1196 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1197 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1198 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1199 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1200 for (i=0; i<first; i++) notme[i] = i; 1201 for (i=last; i<M; i++) notme[i-last+first] = i; 1202 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1203 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1204 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1205 Aoff = Aoffs[0]; 1206 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1207 Boff = Boffs[0]; 1208 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1209 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1211 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1213 ierr = PetscFree(notme);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1219 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1220 { 1221 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1222 PetscErrorCode ierr; 1223 1224 PetscFunctionBegin; 1225 /* do nondiagonal part */ 1226 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1227 /* send it on its way */ 1228 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1229 /* do local part */ 1230 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1231 /* receive remote parts */ 1232 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 /* 1237 This only works correctly for square matrices where the subblock A->A is the 1238 diagonal block 1239 */ 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1242 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1243 { 1244 PetscErrorCode ierr; 1245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1246 1247 PetscFunctionBegin; 1248 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1249 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1250 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatScale_MPIAIJ" 1256 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1257 { 1258 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1263 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatDestroy_MPIAIJ" 1269 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1270 { 1271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 #if defined(PETSC_USE_LOG) 1276 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1277 #endif 1278 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1279 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1280 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1282 #if defined(PETSC_USE_CTABLE) 1283 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1284 #else 1285 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1286 #endif 1287 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1288 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1289 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1290 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1291 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1292 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1293 1294 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1308 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1309 { 1310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1311 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1312 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1313 PetscErrorCode ierr; 1314 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1315 int fd; 1316 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1317 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1318 PetscScalar *column_values; 1319 PetscInt message_count,flowcontrolcount; 1320 FILE *file; 1321 1322 PetscFunctionBegin; 1323 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1324 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1325 nz = A->nz + B->nz; 1326 if (!rank) { 1327 header[0] = MAT_FILE_CLASSID; 1328 header[1] = mat->rmap->N; 1329 header[2] = mat->cmap->N; 1330 1331 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1332 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1333 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1334 /* get largest number of rows any processor has */ 1335 rlen = mat->rmap->n; 1336 range = mat->rmap->range; 1337 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1338 } else { 1339 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1340 rlen = mat->rmap->n; 1341 } 1342 1343 /* load up the local row counts */ 1344 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1345 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1346 1347 /* store the row lengths to the file */ 1348 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1349 if (!rank) { 1350 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1351 for (i=1; i<size; i++) { 1352 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1353 rlen = range[i+1] - range[i]; 1354 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1355 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1356 } 1357 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1358 } else { 1359 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1360 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1361 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1362 } 1363 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1364 1365 /* load up the local column indices */ 1366 nzmax = nz; /* th processor needs space a largest processor needs */ 1367 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1368 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1369 cnt = 0; 1370 for (i=0; i<mat->rmap->n; i++) { 1371 for (j=B->i[i]; j<B->i[i+1]; j++) { 1372 if ((col = garray[B->j[j]]) > cstart) break; 1373 column_indices[cnt++] = col; 1374 } 1375 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1376 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1377 } 1378 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1379 1380 /* store the column indices to the file */ 1381 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1382 if (!rank) { 1383 MPI_Status status; 1384 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1385 for (i=1; i<size; i++) { 1386 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1387 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1388 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1389 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1390 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1391 } 1392 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1393 } else { 1394 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1395 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1396 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1398 } 1399 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1400 1401 /* load up the local column values */ 1402 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1403 cnt = 0; 1404 for (i=0; i<mat->rmap->n; i++) { 1405 for (j=B->i[i]; j<B->i[i+1]; j++) { 1406 if (garray[B->j[j]] > cstart) break; 1407 column_values[cnt++] = B->a[j]; 1408 } 1409 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1410 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1411 } 1412 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1413 1414 /* store the column values to the file */ 1415 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1416 if (!rank) { 1417 MPI_Status status; 1418 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1419 for (i=1; i<size; i++) { 1420 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1421 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1422 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1423 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1424 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1425 } 1426 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1427 } else { 1428 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1429 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1430 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1432 } 1433 ierr = PetscFree(column_values);CHKERRQ(ierr); 1434 1435 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1436 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #include <petscdraw.h> 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1443 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1444 { 1445 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1446 PetscErrorCode ierr; 1447 PetscMPIInt rank = aij->rank,size = aij->size; 1448 PetscBool isdraw,iascii,isbinary; 1449 PetscViewer sviewer; 1450 PetscViewerFormat format; 1451 1452 PetscFunctionBegin; 1453 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1456 if (iascii) { 1457 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1458 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1459 MatInfo info; 1460 PetscBool inodes; 1461 1462 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1463 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1465 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1466 if (!inodes) { 1467 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1468 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1469 } else { 1470 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1471 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1472 } 1473 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1475 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1477 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1480 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1483 PetscInt inodecount,inodelimit,*inodes; 1484 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1485 if (inodes) { 1486 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1487 } else { 1488 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1489 } 1490 PetscFunctionReturn(0); 1491 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1492 PetscFunctionReturn(0); 1493 } 1494 } else if (isbinary) { 1495 if (size == 1) { 1496 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1497 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1498 } else { 1499 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1500 } 1501 PetscFunctionReturn(0); 1502 } else if (isdraw) { 1503 PetscDraw draw; 1504 PetscBool isnull; 1505 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1506 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1507 } 1508 1509 if (size == 1) { 1510 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1511 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1512 } else { 1513 /* assemble the entire matrix onto first processor. */ 1514 Mat A; 1515 Mat_SeqAIJ *Aloc; 1516 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1517 MatScalar *a; 1518 1519 if (mat->rmap->N > 1024) { 1520 PetscBool flg = PETSC_FALSE; 1521 1522 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1523 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1524 } 1525 1526 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1527 if (!rank) { 1528 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1529 } else { 1530 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1531 } 1532 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1533 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1534 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1535 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1536 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1537 1538 /* copy over the A part */ 1539 Aloc = (Mat_SeqAIJ*)aij->A->data; 1540 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1541 row = mat->rmap->rstart; 1542 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1543 for (i=0; i<m; i++) { 1544 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1545 row++; 1546 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1547 } 1548 aj = Aloc->j; 1549 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1550 1551 /* copy over the B part */ 1552 Aloc = (Mat_SeqAIJ*)aij->B->data; 1553 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1554 row = mat->rmap->rstart; 1555 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1556 ct = cols; 1557 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1558 for (i=0; i<m; i++) { 1559 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1560 row++; 1561 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1562 } 1563 ierr = PetscFree(ct);CHKERRQ(ierr); 1564 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1565 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 /* 1567 Everyone has to call to draw the matrix since the graphics waits are 1568 synchronized across all processors that share the PetscDraw object 1569 */ 1570 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1571 if (!rank) { 1572 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1573 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1574 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1575 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1576 } 1577 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1578 ierr = MatDestroy(&A);CHKERRQ(ierr); 1579 } 1580 PetscFunctionReturn(0); 1581 } 1582 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatView_MPIAIJ" 1585 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1586 { 1587 PetscErrorCode ierr; 1588 PetscBool iascii,isdraw,issocket,isbinary; 1589 1590 PetscFunctionBegin; 1591 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1595 if (iascii || isdraw || isbinary || issocket) { 1596 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1597 } 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatSOR_MPIAIJ" 1603 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1604 { 1605 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1606 PetscErrorCode ierr; 1607 Vec bb1 = 0; 1608 PetscBool hasop; 1609 1610 PetscFunctionBegin; 1611 if (flag == SOR_APPLY_UPPER) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 PetscFunctionReturn(0); 1614 } 1615 1616 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1617 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1618 } 1619 1620 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1621 if (flag & SOR_ZERO_INITIAL_GUESS) { 1622 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1623 its--; 1624 } 1625 1626 while (its--) { 1627 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 1630 /* update rhs: bb1 = bb - B*x */ 1631 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1632 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1633 1634 /* local sweep */ 1635 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1636 } 1637 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1638 if (flag & SOR_ZERO_INITIAL_GUESS) { 1639 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1640 its--; 1641 } 1642 while (its--) { 1643 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 1646 /* update rhs: bb1 = bb - B*x */ 1647 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1648 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1649 1650 /* local sweep */ 1651 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1652 } 1653 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1654 if (flag & SOR_ZERO_INITIAL_GUESS) { 1655 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1656 its--; 1657 } 1658 while (its--) { 1659 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 1662 /* update rhs: bb1 = bb - B*x */ 1663 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1664 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1665 1666 /* local sweep */ 1667 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1668 } 1669 } else if (flag & SOR_EISENSTAT) { 1670 Vec xx1; 1671 1672 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1673 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1674 1675 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1676 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 if (!mat->diag) { 1678 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1679 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1680 } 1681 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1682 if (hasop) { 1683 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1684 } else { 1685 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1686 } 1687 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1688 1689 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1690 1691 /* local sweep */ 1692 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1693 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1694 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1695 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1696 1697 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatPermute_MPIAIJ" 1703 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1704 { 1705 Mat aA,aB,Aperm; 1706 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1707 PetscScalar *aa,*ba; 1708 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1709 PetscSF rowsf,sf; 1710 IS parcolp = NULL; 1711 PetscBool done; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1716 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1717 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1718 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1719 1720 /* Invert row permutation to find out where my rows should go */ 1721 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1722 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1723 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1724 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1725 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1726 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1727 1728 /* Invert column permutation to find out where my columns should go */ 1729 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1730 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1731 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1732 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1733 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1734 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1736 1737 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1738 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1739 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1740 1741 /* Find out where my gcols should go */ 1742 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1743 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1744 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1745 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1746 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1747 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1748 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1750 1751 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1752 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1753 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1756 for (i=0; i<m; i++) { 1757 PetscInt row = rdest[i],rowner; 1758 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1759 for (j=ai[i]; j<ai[i+1]; j++) { 1760 PetscInt cowner,col = cdest[aj[j]]; 1761 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1762 if (rowner == cowner) dnnz[i]++; 1763 else onnz[i]++; 1764 } 1765 for (j=bi[i]; j<bi[i+1]; j++) { 1766 PetscInt cowner,col = gcdest[bj[j]]; 1767 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1768 if (rowner == cowner) dnnz[i]++; 1769 else onnz[i]++; 1770 } 1771 } 1772 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1773 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1775 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1779 1780 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1781 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1783 for (i=0; i<m; i++) { 1784 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1785 PetscInt rowlen; 1786 rowlen = ai[i+1] - ai[i]; 1787 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1788 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1789 rowlen = bi[i+1] - bi[i]; 1790 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1791 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1792 } 1793 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1794 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1797 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1799 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1800 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1801 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1802 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1803 *B = Aperm; 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1809 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1810 { 1811 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1812 Mat A = mat->A,B = mat->B; 1813 PetscErrorCode ierr; 1814 PetscReal isend[5],irecv[5]; 1815 1816 PetscFunctionBegin; 1817 info->block_size = 1.0; 1818 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1819 1820 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1821 isend[3] = info->memory; isend[4] = info->mallocs; 1822 1823 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1824 1825 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1826 isend[3] += info->memory; isend[4] += info->mallocs; 1827 if (flag == MAT_LOCAL) { 1828 info->nz_used = isend[0]; 1829 info->nz_allocated = isend[1]; 1830 info->nz_unneeded = isend[2]; 1831 info->memory = isend[3]; 1832 info->mallocs = isend[4]; 1833 } else if (flag == MAT_GLOBAL_MAX) { 1834 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1835 1836 info->nz_used = irecv[0]; 1837 info->nz_allocated = irecv[1]; 1838 info->nz_unneeded = irecv[2]; 1839 info->memory = irecv[3]; 1840 info->mallocs = irecv[4]; 1841 } else if (flag == MAT_GLOBAL_SUM) { 1842 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1843 1844 info->nz_used = irecv[0]; 1845 info->nz_allocated = irecv[1]; 1846 info->nz_unneeded = irecv[2]; 1847 info->memory = irecv[3]; 1848 info->mallocs = irecv[4]; 1849 } 1850 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1851 info->fill_ratio_needed = 0; 1852 info->factor_mallocs = 0; 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatSetOption_MPIAIJ" 1858 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1859 { 1860 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1861 PetscErrorCode ierr; 1862 1863 PetscFunctionBegin; 1864 switch (op) { 1865 case MAT_NEW_NONZERO_LOCATIONS: 1866 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1867 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1868 case MAT_KEEP_NONZERO_PATTERN: 1869 case MAT_NEW_NONZERO_LOCATION_ERR: 1870 case MAT_USE_INODES: 1871 case MAT_IGNORE_ZERO_ENTRIES: 1872 MatCheckPreallocated(A,1); 1873 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1874 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1875 break; 1876 case MAT_ROW_ORIENTED: 1877 a->roworiented = flg; 1878 1879 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1880 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1881 break; 1882 case MAT_NEW_DIAGONALS: 1883 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1884 break; 1885 case MAT_IGNORE_OFF_PROC_ENTRIES: 1886 a->donotstash = flg; 1887 break; 1888 case MAT_SPD: 1889 A->spd_set = PETSC_TRUE; 1890 A->spd = flg; 1891 if (flg) { 1892 A->symmetric = PETSC_TRUE; 1893 A->structurally_symmetric = PETSC_TRUE; 1894 A->symmetric_set = PETSC_TRUE; 1895 A->structurally_symmetric_set = PETSC_TRUE; 1896 } 1897 break; 1898 case MAT_SYMMETRIC: 1899 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1900 break; 1901 case MAT_STRUCTURALLY_SYMMETRIC: 1902 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1903 break; 1904 case MAT_HERMITIAN: 1905 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1906 break; 1907 case MAT_SYMMETRY_ETERNAL: 1908 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1909 break; 1910 default: 1911 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1912 } 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatGetRow_MPIAIJ" 1918 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1919 { 1920 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1921 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1922 PetscErrorCode ierr; 1923 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1924 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1925 PetscInt *cmap,*idx_p; 1926 1927 PetscFunctionBegin; 1928 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1929 mat->getrowactive = PETSC_TRUE; 1930 1931 if (!mat->rowvalues && (idx || v)) { 1932 /* 1933 allocate enough space to hold information from the longest row. 1934 */ 1935 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1936 PetscInt max = 1,tmp; 1937 for (i=0; i<matin->rmap->n; i++) { 1938 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1939 if (max < tmp) max = tmp; 1940 } 1941 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1942 } 1943 1944 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1945 lrow = row - rstart; 1946 1947 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1948 if (!v) {pvA = 0; pvB = 0;} 1949 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1950 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1951 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1952 nztot = nzA + nzB; 1953 1954 cmap = mat->garray; 1955 if (v || idx) { 1956 if (nztot) { 1957 /* Sort by increasing column numbers, assuming A and B already sorted */ 1958 PetscInt imark = -1; 1959 if (v) { 1960 *v = v_p = mat->rowvalues; 1961 for (i=0; i<nzB; i++) { 1962 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1963 else break; 1964 } 1965 imark = i; 1966 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1967 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1968 } 1969 if (idx) { 1970 *idx = idx_p = mat->rowindices; 1971 if (imark > -1) { 1972 for (i=0; i<imark; i++) { 1973 idx_p[i] = cmap[cworkB[i]]; 1974 } 1975 } else { 1976 for (i=0; i<nzB; i++) { 1977 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1978 else break; 1979 } 1980 imark = i; 1981 } 1982 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1983 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1984 } 1985 } else { 1986 if (idx) *idx = 0; 1987 if (v) *v = 0; 1988 } 1989 } 1990 *nz = nztot; 1991 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1992 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1998 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1999 { 2000 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2001 2002 PetscFunctionBegin; 2003 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2004 aij->getrowactive = PETSC_FALSE; 2005 PetscFunctionReturn(0); 2006 } 2007 2008 #undef __FUNCT__ 2009 #define __FUNCT__ "MatNorm_MPIAIJ" 2010 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2011 { 2012 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2013 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2014 PetscErrorCode ierr; 2015 PetscInt i,j,cstart = mat->cmap->rstart; 2016 PetscReal sum = 0.0; 2017 MatScalar *v; 2018 2019 PetscFunctionBegin; 2020 if (aij->size == 1) { 2021 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2022 } else { 2023 if (type == NORM_FROBENIUS) { 2024 v = amat->a; 2025 for (i=0; i<amat->nz; i++) { 2026 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2027 } 2028 v = bmat->a; 2029 for (i=0; i<bmat->nz; i++) { 2030 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2031 } 2032 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2033 *norm = PetscSqrtReal(*norm); 2034 } else if (type == NORM_1) { /* max column norm */ 2035 PetscReal *tmp,*tmp2; 2036 PetscInt *jj,*garray = aij->garray; 2037 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2039 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2040 *norm = 0.0; 2041 v = amat->a; jj = amat->j; 2042 for (j=0; j<amat->nz; j++) { 2043 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2044 } 2045 v = bmat->a; jj = bmat->j; 2046 for (j=0; j<bmat->nz; j++) { 2047 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2048 } 2049 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2050 for (j=0; j<mat->cmap->N; j++) { 2051 if (tmp2[j] > *norm) *norm = tmp2[j]; 2052 } 2053 ierr = PetscFree(tmp);CHKERRQ(ierr); 2054 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2055 } else if (type == NORM_INFINITY) { /* max row norm */ 2056 PetscReal ntemp = 0.0; 2057 for (j=0; j<aij->A->rmap->n; j++) { 2058 v = amat->a + amat->i[j]; 2059 sum = 0.0; 2060 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2061 sum += PetscAbsScalar(*v); v++; 2062 } 2063 v = bmat->a + bmat->i[j]; 2064 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2065 sum += PetscAbsScalar(*v); v++; 2066 } 2067 if (sum > ntemp) ntemp = sum; 2068 } 2069 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2070 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatTranspose_MPIAIJ" 2077 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2078 { 2079 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2080 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2081 PetscErrorCode ierr; 2082 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2083 PetscInt cstart = A->cmap->rstart,ncol; 2084 Mat B; 2085 MatScalar *array; 2086 2087 PetscFunctionBegin; 2088 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2089 2090 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2091 ai = Aloc->i; aj = Aloc->j; 2092 bi = Bloc->i; bj = Bloc->j; 2093 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2094 PetscInt *d_nnz,*g_nnz,*o_nnz; 2095 PetscSFNode *oloc; 2096 PETSC_UNUSED PetscSF sf; 2097 2098 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2099 /* compute d_nnz for preallocation */ 2100 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2101 for (i=0; i<ai[ma]; i++) { 2102 d_nnz[aj[i]]++; 2103 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2104 } 2105 /* compute local off-diagonal contributions */ 2106 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2107 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2108 /* map those to global */ 2109 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2110 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2111 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2112 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2113 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2114 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2116 2117 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2118 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2119 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2120 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2121 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2122 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2123 } else { 2124 B = *matout; 2125 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2126 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2127 } 2128 2129 /* copy over the A part */ 2130 array = Aloc->a; 2131 row = A->rmap->rstart; 2132 for (i=0; i<ma; i++) { 2133 ncol = ai[i+1]-ai[i]; 2134 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2135 row++; 2136 array += ncol; aj += ncol; 2137 } 2138 aj = Aloc->j; 2139 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2140 2141 /* copy over the B part */ 2142 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2143 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2144 array = Bloc->a; 2145 row = A->rmap->rstart; 2146 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2147 cols_tmp = cols; 2148 for (i=0; i<mb; i++) { 2149 ncol = bi[i+1]-bi[i]; 2150 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2151 row++; 2152 array += ncol; cols_tmp += ncol; 2153 } 2154 ierr = PetscFree(cols);CHKERRQ(ierr); 2155 2156 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2159 *matout = B; 2160 } else { 2161 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2168 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2169 { 2170 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2171 Mat a = aij->A,b = aij->B; 2172 PetscErrorCode ierr; 2173 PetscInt s1,s2,s3; 2174 2175 PetscFunctionBegin; 2176 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2177 if (rr) { 2178 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2179 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2180 /* Overlap communication with computation. */ 2181 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2182 } 2183 if (ll) { 2184 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2185 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2186 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2187 } 2188 /* scale the diagonal block */ 2189 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2190 2191 if (rr) { 2192 /* Do a scatter end and then right scale the off-diagonal block */ 2193 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2194 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2201 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2208 PetscFunctionReturn(0); 2209 } 2210 2211 #undef __FUNCT__ 2212 #define __FUNCT__ "MatEqual_MPIAIJ" 2213 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2214 { 2215 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2216 Mat a,b,c,d; 2217 PetscBool flg; 2218 PetscErrorCode ierr; 2219 2220 PetscFunctionBegin; 2221 a = matA->A; b = matA->B; 2222 c = matB->A; d = matB->B; 2223 2224 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2225 if (flg) { 2226 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2227 } 2228 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatCopy_MPIAIJ" 2234 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2235 { 2236 PetscErrorCode ierr; 2237 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2238 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2239 2240 PetscFunctionBegin; 2241 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2242 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2243 /* because of the column compression in the off-processor part of the matrix a->B, 2244 the number of columns in a->B and b->B may be different, hence we cannot call 2245 the MatCopy() directly on the two parts. If need be, we can provide a more 2246 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2247 then copying the submatrices */ 2248 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2249 } else { 2250 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2251 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatSetUp_MPIAIJ" 2258 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2259 { 2260 PetscErrorCode ierr; 2261 2262 PetscFunctionBegin; 2263 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2269 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2270 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2271 { 2272 PetscInt i,m=Y->rmap->N; 2273 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2274 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2275 const PetscInt *xi = x->i,*yi = y->i; 2276 2277 PetscFunctionBegin; 2278 /* Set the number of nonzeros in the new matrix */ 2279 for (i=0; i<m; i++) { 2280 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2281 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2282 nnz[i] = 0; 2283 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2284 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2285 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2286 nnz[i]++; 2287 } 2288 for (; k<nzy; k++) nnz[i]++; 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatAXPY_MPIAIJ" 2295 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2296 { 2297 PetscErrorCode ierr; 2298 PetscInt i; 2299 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2300 PetscBLASInt bnz,one=1; 2301 Mat_SeqAIJ *x,*y; 2302 2303 PetscFunctionBegin; 2304 if (str == SAME_NONZERO_PATTERN) { 2305 PetscScalar alpha = a; 2306 x = (Mat_SeqAIJ*)xx->A->data; 2307 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2308 y = (Mat_SeqAIJ*)yy->A->data; 2309 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2310 x = (Mat_SeqAIJ*)xx->B->data; 2311 y = (Mat_SeqAIJ*)yy->B->data; 2312 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2313 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2314 } else if (str == SUBSET_NONZERO_PATTERN) { 2315 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2316 2317 x = (Mat_SeqAIJ*)xx->B->data; 2318 y = (Mat_SeqAIJ*)yy->B->data; 2319 if (y->xtoy && y->XtoY != xx->B) { 2320 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2321 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2322 } 2323 if (!y->xtoy) { /* get xtoy */ 2324 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2325 y->XtoY = xx->B; 2326 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2327 } 2328 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2329 } else { 2330 Mat B; 2331 PetscInt *nnz_d,*nnz_o; 2332 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2333 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2334 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2335 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2336 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2337 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2338 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2339 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2341 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2342 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2343 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2344 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2346 } 2347 PetscFunctionReturn(0); 2348 } 2349 2350 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2351 2352 #undef __FUNCT__ 2353 #define __FUNCT__ "MatConjugate_MPIAIJ" 2354 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2355 { 2356 #if defined(PETSC_USE_COMPLEX) 2357 PetscErrorCode ierr; 2358 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2359 2360 PetscFunctionBegin; 2361 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2362 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2363 #else 2364 PetscFunctionBegin; 2365 #endif 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatRealPart_MPIAIJ" 2371 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2372 { 2373 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2374 PetscErrorCode ierr; 2375 2376 PetscFunctionBegin; 2377 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2378 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2384 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2385 { 2386 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2387 PetscErrorCode ierr; 2388 2389 PetscFunctionBegin; 2390 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2391 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #if defined(PETSC_HAVE_PBGL) 2396 2397 #include <boost/parallel/mpi/bsp_process_group.hpp> 2398 #include <boost/graph/distributed/ilu_default_graph.hpp> 2399 #include <boost/graph/distributed/ilu_0_block.hpp> 2400 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2401 #include <boost/graph/distributed/petsc/interface.hpp> 2402 #include <boost/multi_array.hpp> 2403 #include <boost/parallel/distributed_property_map->hpp> 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2407 /* 2408 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2409 */ 2410 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2411 { 2412 namespace petsc = boost::distributed::petsc; 2413 2414 namespace graph_dist = boost::graph::distributed; 2415 using boost::graph::distributed::ilu_default::process_group_type; 2416 using boost::graph::ilu_permuted; 2417 2418 PetscBool row_identity, col_identity; 2419 PetscContainer c; 2420 PetscInt m, n, M, N; 2421 PetscErrorCode ierr; 2422 2423 PetscFunctionBegin; 2424 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2425 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2426 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2427 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2428 2429 process_group_type pg; 2430 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2431 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2432 lgraph_type& level_graph = *lgraph_p; 2433 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2434 2435 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2436 ilu_permuted(level_graph); 2437 2438 /* put together the new matrix */ 2439 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2440 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2441 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2442 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2443 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2444 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2445 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2446 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 2448 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2449 ierr = PetscContainerSetPointer(c, lgraph_p); 2450 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2451 ierr = PetscContainerDestroy(&c); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 #undef __FUNCT__ 2456 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2457 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2458 { 2459 PetscFunctionBegin; 2460 PetscFunctionReturn(0); 2461 } 2462 2463 #undef __FUNCT__ 2464 #define __FUNCT__ "MatSolve_MPIAIJ" 2465 /* 2466 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2467 */ 2468 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2469 { 2470 namespace graph_dist = boost::graph::distributed; 2471 2472 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2473 lgraph_type *lgraph_p; 2474 PetscContainer c; 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2479 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2480 ierr = VecCopy(b, x);CHKERRQ(ierr); 2481 2482 PetscScalar *array_x; 2483 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2484 PetscInt sx; 2485 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2486 2487 PetscScalar *array_b; 2488 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2489 PetscInt sb; 2490 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2491 2492 lgraph_type& level_graph = *lgraph_p; 2493 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2494 2495 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2496 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2497 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2498 2499 typedef boost::iterator_property_map<array_ref_type::iterator, 2500 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2501 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2502 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2503 2504 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2505 PetscFunctionReturn(0); 2506 } 2507 #endif 2508 2509 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2510 PetscInt nzlocal,nsends,nrecvs; 2511 PetscMPIInt *send_rank,*recv_rank; 2512 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2513 PetscScalar *sbuf_a,**rbuf_a; 2514 PetscErrorCode (*Destroy)(Mat); 2515 } Mat_Redundant; 2516 2517 #undef __FUNCT__ 2518 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2519 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2520 { 2521 PetscErrorCode ierr; 2522 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2523 PetscInt i; 2524 2525 PetscFunctionBegin; 2526 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2527 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2528 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2529 for (i=0; i<redund->nrecvs; i++) { 2530 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2531 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2532 } 2533 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2534 ierr = PetscFree(redund);CHKERRQ(ierr); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 #undef __FUNCT__ 2539 #define __FUNCT__ "MatDestroy_MatRedundant" 2540 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2541 { 2542 PetscErrorCode ierr; 2543 PetscContainer container; 2544 Mat_Redundant *redund=NULL; 2545 2546 PetscFunctionBegin; 2547 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2548 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2549 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2550 2551 A->ops->destroy = redund->Destroy; 2552 2553 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2554 if (A->ops->destroy) { 2555 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2556 } 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2562 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2563 { 2564 PetscMPIInt rank,size; 2565 MPI_Comm comm; 2566 PetscErrorCode ierr; 2567 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2568 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2569 PetscInt *rowrange = mat->rmap->range; 2570 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2571 Mat A = aij->A,B=aij->B,C=*matredundant; 2572 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2573 PetscScalar *sbuf_a; 2574 PetscInt nzlocal=a->nz+b->nz; 2575 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2576 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2577 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2578 MatScalar *aworkA,*aworkB; 2579 PetscScalar *vals; 2580 PetscMPIInt tag1,tag2,tag3,imdex; 2581 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2582 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2583 MPI_Status recv_status,*send_status; 2584 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2585 PetscInt **rbuf_j=NULL; 2586 PetscScalar **rbuf_a=NULL; 2587 Mat_Redundant *redund =NULL; 2588 PetscContainer container; 2589 2590 PetscFunctionBegin; 2591 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2592 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2593 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2594 2595 if (reuse == MAT_REUSE_MATRIX) { 2596 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2597 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2598 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2599 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2600 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2601 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2602 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2603 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2604 2605 nsends = redund->nsends; 2606 nrecvs = redund->nrecvs; 2607 send_rank = redund->send_rank; 2608 recv_rank = redund->recv_rank; 2609 sbuf_nz = redund->sbuf_nz; 2610 rbuf_nz = redund->rbuf_nz; 2611 sbuf_j = redund->sbuf_j; 2612 sbuf_a = redund->sbuf_a; 2613 rbuf_j = redund->rbuf_j; 2614 rbuf_a = redund->rbuf_a; 2615 } 2616 2617 if (reuse == MAT_INITIAL_MATRIX) { 2618 PetscMPIInt subrank,subsize; 2619 PetscInt nleftover,np_subcomm; 2620 /* get the destination processors' id send_rank, nsends and nrecvs */ 2621 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2622 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2623 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2624 2625 np_subcomm = size/nsubcomm; 2626 nleftover = size - nsubcomm*np_subcomm; 2627 2628 nsends = 0; nrecvs = 0; 2629 for (i=0; i<size; i++) { /* i=rank*/ 2630 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2631 send_rank[nsends] = i; nsends++; 2632 recv_rank[nrecvs++] = i; 2633 } 2634 } 2635 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2636 i = size-nleftover-1; 2637 j = 0; 2638 while (j < nsubcomm - nleftover) { 2639 send_rank[nsends++] = i; 2640 i--; j++; 2641 } 2642 } 2643 2644 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2645 for (i=0; i<nleftover; i++) { 2646 recv_rank[nrecvs++] = size-nleftover+i; 2647 } 2648 } 2649 2650 /* allocate sbuf_j, sbuf_a */ 2651 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2652 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2653 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2654 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2655 2656 /* copy mat's local entries into the buffers */ 2657 if (reuse == MAT_INITIAL_MATRIX) { 2658 rownz_max = 0; 2659 rptr = sbuf_j; 2660 cols = sbuf_j + rend-rstart + 1; 2661 vals = sbuf_a; 2662 rptr[0] = 0; 2663 for (i=0; i<rend-rstart; i++) { 2664 row = i + rstart; 2665 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2666 ncols = nzA + nzB; 2667 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2668 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2669 /* load the column indices for this row into cols */ 2670 lwrite = 0; 2671 for (l=0; l<nzB; l++) { 2672 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2673 vals[lwrite] = aworkB[l]; 2674 cols[lwrite++] = ctmp; 2675 } 2676 } 2677 for (l=0; l<nzA; l++) { 2678 vals[lwrite] = aworkA[l]; 2679 cols[lwrite++] = cstart + cworkA[l]; 2680 } 2681 for (l=0; l<nzB; l++) { 2682 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2683 vals[lwrite] = aworkB[l]; 2684 cols[lwrite++] = ctmp; 2685 } 2686 } 2687 vals += ncols; 2688 cols += ncols; 2689 rptr[i+1] = rptr[i] + ncols; 2690 if (rownz_max < ncols) rownz_max = ncols; 2691 } 2692 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2693 } else { /* only copy matrix values into sbuf_a */ 2694 rptr = sbuf_j; 2695 vals = sbuf_a; 2696 rptr[0] = 0; 2697 for (i=0; i<rend-rstart; i++) { 2698 row = i + rstart; 2699 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2700 ncols = nzA + nzB; 2701 cworkB = b->j + b->i[i]; 2702 aworkA = a->a + a->i[i]; 2703 aworkB = b->a + b->i[i]; 2704 lwrite = 0; 2705 for (l=0; l<nzB; l++) { 2706 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2707 } 2708 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2709 for (l=0; l<nzB; l++) { 2710 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2711 } 2712 vals += ncols; 2713 rptr[i+1] = rptr[i] + ncols; 2714 } 2715 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2716 2717 /* send nzlocal to others, and recv other's nzlocal */ 2718 /*--------------------------------------------------*/ 2719 if (reuse == MAT_INITIAL_MATRIX) { 2720 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2721 2722 s_waits2 = s_waits3 + nsends; 2723 s_waits1 = s_waits2 + nsends; 2724 r_waits1 = s_waits1 + nsends; 2725 r_waits2 = r_waits1 + nrecvs; 2726 r_waits3 = r_waits2 + nrecvs; 2727 } else { 2728 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2729 2730 r_waits3 = s_waits3 + nsends; 2731 } 2732 2733 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2734 if (reuse == MAT_INITIAL_MATRIX) { 2735 /* get new tags to keep the communication clean */ 2736 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2737 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2738 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2739 2740 /* post receives of other's nzlocal */ 2741 for (i=0; i<nrecvs; i++) { 2742 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2743 } 2744 /* send nzlocal to others */ 2745 for (i=0; i<nsends; i++) { 2746 sbuf_nz[i] = nzlocal; 2747 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2748 } 2749 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2750 count = nrecvs; 2751 while (count) { 2752 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2753 2754 recv_rank[imdex] = recv_status.MPI_SOURCE; 2755 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2756 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2757 2758 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2759 2760 rbuf_nz[imdex] += i + 2; 2761 2762 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2763 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2764 count--; 2765 } 2766 /* wait on sends of nzlocal */ 2767 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2768 /* send mat->i,j to others, and recv from other's */ 2769 /*------------------------------------------------*/ 2770 for (i=0; i<nsends; i++) { 2771 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2772 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2773 } 2774 /* wait on receives of mat->i,j */ 2775 /*------------------------------*/ 2776 count = nrecvs; 2777 while (count) { 2778 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2779 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2780 count--; 2781 } 2782 /* wait on sends of mat->i,j */ 2783 /*---------------------------*/ 2784 if (nsends) { 2785 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2786 } 2787 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2788 2789 /* post receives, send and receive mat->a */ 2790 /*----------------------------------------*/ 2791 for (imdex=0; imdex<nrecvs; imdex++) { 2792 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2793 } 2794 for (i=0; i<nsends; i++) { 2795 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2796 } 2797 count = nrecvs; 2798 while (count) { 2799 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2800 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2801 count--; 2802 } 2803 if (nsends) { 2804 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2805 } 2806 2807 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2808 2809 /* create redundant matrix */ 2810 /*-------------------------*/ 2811 if (reuse == MAT_INITIAL_MATRIX) { 2812 /* compute rownz_max for preallocation */ 2813 for (imdex=0; imdex<nrecvs; imdex++) { 2814 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2815 rptr = rbuf_j[imdex]; 2816 for (i=0; i<j; i++) { 2817 ncols = rptr[i+1] - rptr[i]; 2818 if (rownz_max < ncols) rownz_max = ncols; 2819 } 2820 } 2821 2822 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2823 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2824 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2825 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2826 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2827 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2828 } else { 2829 C = *matredundant; 2830 } 2831 2832 /* insert local matrix entries */ 2833 rptr = sbuf_j; 2834 cols = sbuf_j + rend-rstart + 1; 2835 vals = sbuf_a; 2836 for (i=0; i<rend-rstart; i++) { 2837 row = i + rstart; 2838 ncols = rptr[i+1] - rptr[i]; 2839 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2840 vals += ncols; 2841 cols += ncols; 2842 } 2843 /* insert received matrix entries */ 2844 for (imdex=0; imdex<nrecvs; imdex++) { 2845 rstart = rowrange[recv_rank[imdex]]; 2846 rend = rowrange[recv_rank[imdex]+1]; 2847 rptr = rbuf_j[imdex]; 2848 cols = rbuf_j[imdex] + rend-rstart + 1; 2849 vals = rbuf_a[imdex]; 2850 for (i=0; i<rend-rstart; i++) { 2851 row = i + rstart; 2852 ncols = rptr[i+1] - rptr[i]; 2853 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2854 vals += ncols; 2855 cols += ncols; 2856 } 2857 } 2858 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2859 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2860 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2861 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2862 if (reuse == MAT_INITIAL_MATRIX) { 2863 PetscContainer container; 2864 *matredundant = C; 2865 /* create a supporting struct and attach it to C for reuse */ 2866 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2867 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2868 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2869 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2870 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2871 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2872 2873 redund->nzlocal = nzlocal; 2874 redund->nsends = nsends; 2875 redund->nrecvs = nrecvs; 2876 redund->send_rank = send_rank; 2877 redund->recv_rank = recv_rank; 2878 redund->sbuf_nz = sbuf_nz; 2879 redund->rbuf_nz = rbuf_nz; 2880 redund->sbuf_j = sbuf_j; 2881 redund->sbuf_a = sbuf_a; 2882 redund->rbuf_j = rbuf_j; 2883 redund->rbuf_a = rbuf_a; 2884 2885 redund->Destroy = C->ops->destroy; 2886 C->ops->destroy = MatDestroy_MatRedundant; 2887 } 2888 PetscFunctionReturn(0); 2889 } 2890 2891 #undef __FUNCT__ 2892 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2893 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2894 { 2895 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2896 PetscErrorCode ierr; 2897 PetscInt i,*idxb = 0; 2898 PetscScalar *va,*vb; 2899 Vec vtmp; 2900 2901 PetscFunctionBegin; 2902 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2903 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2904 if (idx) { 2905 for (i=0; i<A->rmap->n; i++) { 2906 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2907 } 2908 } 2909 2910 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2911 if (idx) { 2912 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2913 } 2914 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2915 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2916 2917 for (i=0; i<A->rmap->n; i++) { 2918 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2919 va[i] = vb[i]; 2920 if (idx) idx[i] = a->garray[idxb[i]]; 2921 } 2922 } 2923 2924 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2925 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2926 ierr = PetscFree(idxb);CHKERRQ(ierr); 2927 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2928 PetscFunctionReturn(0); 2929 } 2930 2931 #undef __FUNCT__ 2932 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2933 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2934 { 2935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2936 PetscErrorCode ierr; 2937 PetscInt i,*idxb = 0; 2938 PetscScalar *va,*vb; 2939 Vec vtmp; 2940 2941 PetscFunctionBegin; 2942 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2943 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2944 if (idx) { 2945 for (i=0; i<A->cmap->n; i++) { 2946 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2947 } 2948 } 2949 2950 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2951 if (idx) { 2952 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2953 } 2954 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2955 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2956 2957 for (i=0; i<A->rmap->n; i++) { 2958 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2959 va[i] = vb[i]; 2960 if (idx) idx[i] = a->garray[idxb[i]]; 2961 } 2962 } 2963 2964 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2965 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2966 ierr = PetscFree(idxb);CHKERRQ(ierr); 2967 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2968 PetscFunctionReturn(0); 2969 } 2970 2971 #undef __FUNCT__ 2972 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2973 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2974 { 2975 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2976 PetscInt n = A->rmap->n; 2977 PetscInt cstart = A->cmap->rstart; 2978 PetscInt *cmap = mat->garray; 2979 PetscInt *diagIdx, *offdiagIdx; 2980 Vec diagV, offdiagV; 2981 PetscScalar *a, *diagA, *offdiagA; 2982 PetscInt r; 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2987 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2988 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2989 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2990 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2991 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2992 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2993 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2994 for (r = 0; r < n; ++r) { 2995 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2996 a[r] = diagA[r]; 2997 idx[r] = cstart + diagIdx[r]; 2998 } else { 2999 a[r] = offdiagA[r]; 3000 idx[r] = cmap[offdiagIdx[r]]; 3001 } 3002 } 3003 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3004 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3005 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3006 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3007 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3008 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3009 PetscFunctionReturn(0); 3010 } 3011 3012 #undef __FUNCT__ 3013 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3014 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3015 { 3016 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3017 PetscInt n = A->rmap->n; 3018 PetscInt cstart = A->cmap->rstart; 3019 PetscInt *cmap = mat->garray; 3020 PetscInt *diagIdx, *offdiagIdx; 3021 Vec diagV, offdiagV; 3022 PetscScalar *a, *diagA, *offdiagA; 3023 PetscInt r; 3024 PetscErrorCode ierr; 3025 3026 PetscFunctionBegin; 3027 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3028 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3029 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3030 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3031 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3032 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3033 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3034 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3035 for (r = 0; r < n; ++r) { 3036 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3037 a[r] = diagA[r]; 3038 idx[r] = cstart + diagIdx[r]; 3039 } else { 3040 a[r] = offdiagA[r]; 3041 idx[r] = cmap[offdiagIdx[r]]; 3042 } 3043 } 3044 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3045 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3046 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3047 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3048 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3049 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3050 PetscFunctionReturn(0); 3051 } 3052 3053 #undef __FUNCT__ 3054 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3055 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3056 { 3057 PetscErrorCode ierr; 3058 Mat *dummy; 3059 3060 PetscFunctionBegin; 3061 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3062 *newmat = *dummy; 3063 ierr = PetscFree(dummy);CHKERRQ(ierr); 3064 PetscFunctionReturn(0); 3065 } 3066 3067 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3071 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3072 { 3073 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3074 PetscErrorCode ierr; 3075 3076 PetscFunctionBegin; 3077 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3078 PetscFunctionReturn(0); 3079 } 3080 3081 #undef __FUNCT__ 3082 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3083 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3084 { 3085 PetscErrorCode ierr; 3086 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3087 3088 PetscFunctionBegin; 3089 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3090 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3091 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3092 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3093 PetscFunctionReturn(0); 3094 } 3095 3096 /* -------------------------------------------------------------------*/ 3097 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3098 MatGetRow_MPIAIJ, 3099 MatRestoreRow_MPIAIJ, 3100 MatMult_MPIAIJ, 3101 /* 4*/ MatMultAdd_MPIAIJ, 3102 MatMultTranspose_MPIAIJ, 3103 MatMultTransposeAdd_MPIAIJ, 3104 #if defined(PETSC_HAVE_PBGL) 3105 MatSolve_MPIAIJ, 3106 #else 3107 0, 3108 #endif 3109 0, 3110 0, 3111 /*10*/ 0, 3112 0, 3113 0, 3114 MatSOR_MPIAIJ, 3115 MatTranspose_MPIAIJ, 3116 /*15*/ MatGetInfo_MPIAIJ, 3117 MatEqual_MPIAIJ, 3118 MatGetDiagonal_MPIAIJ, 3119 MatDiagonalScale_MPIAIJ, 3120 MatNorm_MPIAIJ, 3121 /*20*/ MatAssemblyBegin_MPIAIJ, 3122 MatAssemblyEnd_MPIAIJ, 3123 MatSetOption_MPIAIJ, 3124 MatZeroEntries_MPIAIJ, 3125 /*24*/ MatZeroRows_MPIAIJ, 3126 0, 3127 #if defined(PETSC_HAVE_PBGL) 3128 0, 3129 #else 3130 0, 3131 #endif 3132 0, 3133 0, 3134 /*29*/ MatSetUp_MPIAIJ, 3135 #if defined(PETSC_HAVE_PBGL) 3136 0, 3137 #else 3138 0, 3139 #endif 3140 0, 3141 0, 3142 0, 3143 /*34*/ MatDuplicate_MPIAIJ, 3144 0, 3145 0, 3146 0, 3147 0, 3148 /*39*/ MatAXPY_MPIAIJ, 3149 MatGetSubMatrices_MPIAIJ, 3150 MatIncreaseOverlap_MPIAIJ, 3151 MatGetValues_MPIAIJ, 3152 MatCopy_MPIAIJ, 3153 /*44*/ MatGetRowMax_MPIAIJ, 3154 MatScale_MPIAIJ, 3155 0, 3156 0, 3157 MatZeroRowsColumns_MPIAIJ, 3158 /*49*/ MatSetRandom_MPIAIJ, 3159 0, 3160 0, 3161 0, 3162 0, 3163 /*54*/ MatFDColoringCreate_MPIAIJ, 3164 0, 3165 MatSetUnfactored_MPIAIJ, 3166 MatPermute_MPIAIJ, 3167 0, 3168 /*59*/ MatGetSubMatrix_MPIAIJ, 3169 MatDestroy_MPIAIJ, 3170 MatView_MPIAIJ, 3171 0, 3172 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3173 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3174 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3175 0, 3176 0, 3177 0, 3178 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3179 MatGetRowMinAbs_MPIAIJ, 3180 0, 3181 MatSetColoring_MPIAIJ, 3182 0, 3183 MatSetValuesAdifor_MPIAIJ, 3184 /*75*/ MatFDColoringApply_AIJ, 3185 0, 3186 0, 3187 0, 3188 MatFindZeroDiagonals_MPIAIJ, 3189 /*80*/ 0, 3190 0, 3191 0, 3192 /*83*/ MatLoad_MPIAIJ, 3193 0, 3194 0, 3195 0, 3196 0, 3197 0, 3198 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3199 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3200 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3201 MatPtAP_MPIAIJ_MPIAIJ, 3202 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3203 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3204 0, 3205 0, 3206 0, 3207 0, 3208 /*99*/ 0, 3209 0, 3210 0, 3211 MatConjugate_MPIAIJ, 3212 0, 3213 /*104*/MatSetValuesRow_MPIAIJ, 3214 MatRealPart_MPIAIJ, 3215 MatImaginaryPart_MPIAIJ, 3216 0, 3217 0, 3218 /*109*/0, 3219 MatGetRedundantMatrix_MPIAIJ, 3220 MatGetRowMin_MPIAIJ, 3221 0, 3222 0, 3223 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3224 0, 3225 0, 3226 0, 3227 0, 3228 /*119*/0, 3229 0, 3230 0, 3231 0, 3232 MatGetMultiProcBlock_MPIAIJ, 3233 /*124*/MatFindNonzeroRows_MPIAIJ, 3234 MatGetColumnNorms_MPIAIJ, 3235 MatInvertBlockDiagonal_MPIAIJ, 3236 0, 3237 MatGetSubMatricesParallel_MPIAIJ, 3238 /*129*/0, 3239 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3240 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3241 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3242 0, 3243 /*134*/0, 3244 0, 3245 0, 3246 0, 3247 0 3248 }; 3249 3250 /* ----------------------------------------------------------------------------------------*/ 3251 3252 #undef __FUNCT__ 3253 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3254 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3255 { 3256 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3257 PetscErrorCode ierr; 3258 3259 PetscFunctionBegin; 3260 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3261 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3262 PetscFunctionReturn(0); 3263 } 3264 3265 #undef __FUNCT__ 3266 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3267 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3268 { 3269 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3270 PetscErrorCode ierr; 3271 3272 PetscFunctionBegin; 3273 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3274 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3275 PetscFunctionReturn(0); 3276 } 3277 3278 #undef __FUNCT__ 3279 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3280 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3281 { 3282 Mat_MPIAIJ *b; 3283 PetscErrorCode ierr; 3284 PetscInt i; 3285 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3286 3287 PetscFunctionBegin; 3288 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3289 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3290 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3291 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3292 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3293 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3294 3295 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3296 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3297 if (d_nnz) { 3298 for (i=0; i<B->rmap->n; i++) { 3299 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3300 } 3301 } 3302 if (o_nnz) { 3303 for (i=0; i<B->rmap->n; i++) { 3304 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3305 } 3306 } 3307 b = (Mat_MPIAIJ*)B->data; 3308 3309 if (!B->preallocated) { 3310 /* Explicitly create 2 MATSEQAIJ matrices. */ 3311 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3312 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3313 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3314 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3315 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3316 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3317 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3318 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3319 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3320 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3321 } 3322 3323 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3324 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3325 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3326 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3327 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3328 B->preallocated = PETSC_TRUE; 3329 PetscFunctionReturn(0); 3330 } 3331 3332 #undef __FUNCT__ 3333 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3334 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3335 { 3336 Mat mat; 3337 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3338 PetscErrorCode ierr; 3339 3340 PetscFunctionBegin; 3341 *newmat = 0; 3342 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3343 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3344 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3345 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3346 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3347 a = (Mat_MPIAIJ*)mat->data; 3348 3349 mat->factortype = matin->factortype; 3350 mat->rmap->bs = matin->rmap->bs; 3351 mat->cmap->bs = matin->cmap->bs; 3352 mat->assembled = PETSC_TRUE; 3353 mat->insertmode = NOT_SET_VALUES; 3354 mat->preallocated = PETSC_TRUE; 3355 3356 a->size = oldmat->size; 3357 a->rank = oldmat->rank; 3358 a->donotstash = oldmat->donotstash; 3359 a->roworiented = oldmat->roworiented; 3360 a->rowindices = 0; 3361 a->rowvalues = 0; 3362 a->getrowactive = PETSC_FALSE; 3363 3364 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3365 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3366 3367 if (oldmat->colmap) { 3368 #if defined(PETSC_USE_CTABLE) 3369 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3370 #else 3371 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3372 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3373 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3374 #endif 3375 } else a->colmap = 0; 3376 if (oldmat->garray) { 3377 PetscInt len; 3378 len = oldmat->B->cmap->n; 3379 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3380 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3381 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3382 } else a->garray = 0; 3383 3384 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3385 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3386 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3387 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3388 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3389 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3390 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3391 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3392 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3393 *newmat = mat; 3394 PetscFunctionReturn(0); 3395 } 3396 3397 3398 3399 #undef __FUNCT__ 3400 #define __FUNCT__ "MatLoad_MPIAIJ" 3401 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3402 { 3403 PetscScalar *vals,*svals; 3404 MPI_Comm comm; 3405 PetscErrorCode ierr; 3406 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3407 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3408 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3409 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3410 PetscInt cend,cstart,n,*rowners,sizesset=1; 3411 int fd; 3412 PetscInt bs = 1; 3413 3414 PetscFunctionBegin; 3415 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3416 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3417 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3418 if (!rank) { 3419 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3420 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3421 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3422 } 3423 3424 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3425 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3426 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3427 3428 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3429 3430 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3431 M = header[1]; N = header[2]; 3432 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3433 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3434 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3435 3436 /* If global sizes are set, check if they are consistent with that given in the file */ 3437 if (sizesset) { 3438 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3439 } 3440 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3441 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3442 3443 /* determine ownership of all (block) rows */ 3444 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3445 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3446 else m = newMat->rmap->n; /* Set by user */ 3447 3448 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3449 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3450 3451 /* First process needs enough room for process with most rows */ 3452 if (!rank) { 3453 mmax = rowners[1]; 3454 for (i=2; i<=size; i++) { 3455 mmax = PetscMax(mmax, rowners[i]); 3456 } 3457 } else mmax = m; 3458 3459 rowners[0] = 0; 3460 for (i=2; i<=size; i++) { 3461 rowners[i] += rowners[i-1]; 3462 } 3463 rstart = rowners[rank]; 3464 rend = rowners[rank+1]; 3465 3466 /* distribute row lengths to all processors */ 3467 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3468 if (!rank) { 3469 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3470 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3471 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3472 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3473 for (j=0; j<m; j++) { 3474 procsnz[0] += ourlens[j]; 3475 } 3476 for (i=1; i<size; i++) { 3477 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3478 /* calculate the number of nonzeros on each processor */ 3479 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3480 procsnz[i] += rowlengths[j]; 3481 } 3482 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3483 } 3484 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3485 } else { 3486 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3487 } 3488 3489 if (!rank) { 3490 /* determine max buffer needed and allocate it */ 3491 maxnz = 0; 3492 for (i=0; i<size; i++) { 3493 maxnz = PetscMax(maxnz,procsnz[i]); 3494 } 3495 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3496 3497 /* read in my part of the matrix column indices */ 3498 nz = procsnz[0]; 3499 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3500 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3501 3502 /* read in every one elses and ship off */ 3503 for (i=1; i<size; i++) { 3504 nz = procsnz[i]; 3505 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3506 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3507 } 3508 ierr = PetscFree(cols);CHKERRQ(ierr); 3509 } else { 3510 /* determine buffer space needed for message */ 3511 nz = 0; 3512 for (i=0; i<m; i++) { 3513 nz += ourlens[i]; 3514 } 3515 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3516 3517 /* receive message of column indices*/ 3518 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3519 } 3520 3521 /* determine column ownership if matrix is not square */ 3522 if (N != M) { 3523 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3524 else n = newMat->cmap->n; 3525 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3526 cstart = cend - n; 3527 } else { 3528 cstart = rstart; 3529 cend = rend; 3530 n = cend - cstart; 3531 } 3532 3533 /* loop over local rows, determining number of off diagonal entries */ 3534 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3535 jj = 0; 3536 for (i=0; i<m; i++) { 3537 for (j=0; j<ourlens[i]; j++) { 3538 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3539 jj++; 3540 } 3541 } 3542 3543 for (i=0; i<m; i++) { 3544 ourlens[i] -= offlens[i]; 3545 } 3546 if (!sizesset) { 3547 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3548 } 3549 3550 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3551 3552 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3553 3554 for (i=0; i<m; i++) { 3555 ourlens[i] += offlens[i]; 3556 } 3557 3558 if (!rank) { 3559 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3560 3561 /* read in my part of the matrix numerical values */ 3562 nz = procsnz[0]; 3563 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3564 3565 /* insert into matrix */ 3566 jj = rstart; 3567 smycols = mycols; 3568 svals = vals; 3569 for (i=0; i<m; i++) { 3570 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3571 smycols += ourlens[i]; 3572 svals += ourlens[i]; 3573 jj++; 3574 } 3575 3576 /* read in other processors and ship out */ 3577 for (i=1; i<size; i++) { 3578 nz = procsnz[i]; 3579 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3580 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3581 } 3582 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3583 } else { 3584 /* receive numeric values */ 3585 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3586 3587 /* receive message of values*/ 3588 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3589 3590 /* insert into matrix */ 3591 jj = rstart; 3592 smycols = mycols; 3593 svals = vals; 3594 for (i=0; i<m; i++) { 3595 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3596 smycols += ourlens[i]; 3597 svals += ourlens[i]; 3598 jj++; 3599 } 3600 } 3601 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3602 ierr = PetscFree(vals);CHKERRQ(ierr); 3603 ierr = PetscFree(mycols);CHKERRQ(ierr); 3604 ierr = PetscFree(rowners);CHKERRQ(ierr); 3605 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3606 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3607 PetscFunctionReturn(0); 3608 } 3609 3610 #undef __FUNCT__ 3611 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3612 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3613 { 3614 PetscErrorCode ierr; 3615 IS iscol_local; 3616 PetscInt csize; 3617 3618 PetscFunctionBegin; 3619 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3620 if (call == MAT_REUSE_MATRIX) { 3621 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3622 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3623 } else { 3624 PetscInt cbs; 3625 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3626 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3627 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3628 } 3629 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3630 if (call == MAT_INITIAL_MATRIX) { 3631 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3632 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3633 } 3634 PetscFunctionReturn(0); 3635 } 3636 3637 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3638 #undef __FUNCT__ 3639 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3640 /* 3641 Not great since it makes two copies of the submatrix, first an SeqAIJ 3642 in local and then by concatenating the local matrices the end result. 3643 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3644 3645 Note: This requires a sequential iscol with all indices. 3646 */ 3647 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3648 { 3649 PetscErrorCode ierr; 3650 PetscMPIInt rank,size; 3651 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3652 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3653 PetscBool allcolumns, colflag; 3654 Mat M,Mreuse; 3655 MatScalar *vwork,*aa; 3656 MPI_Comm comm; 3657 Mat_SeqAIJ *aij; 3658 3659 PetscFunctionBegin; 3660 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3661 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3662 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3663 3664 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3665 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3666 if (colflag && ncol == mat->cmap->N) { 3667 allcolumns = PETSC_TRUE; 3668 } else { 3669 allcolumns = PETSC_FALSE; 3670 } 3671 if (call == MAT_REUSE_MATRIX) { 3672 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3673 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3674 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3675 } else { 3676 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3677 } 3678 3679 /* 3680 m - number of local rows 3681 n - number of columns (same on all processors) 3682 rstart - first row in new global matrix generated 3683 */ 3684 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3685 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3686 if (call == MAT_INITIAL_MATRIX) { 3687 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3688 ii = aij->i; 3689 jj = aij->j; 3690 3691 /* 3692 Determine the number of non-zeros in the diagonal and off-diagonal 3693 portions of the matrix in order to do correct preallocation 3694 */ 3695 3696 /* first get start and end of "diagonal" columns */ 3697 if (csize == PETSC_DECIDE) { 3698 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3699 if (mglobal == n) { /* square matrix */ 3700 nlocal = m; 3701 } else { 3702 nlocal = n/size + ((n % size) > rank); 3703 } 3704 } else { 3705 nlocal = csize; 3706 } 3707 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3708 rstart = rend - nlocal; 3709 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3710 3711 /* next, compute all the lengths */ 3712 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3713 olens = dlens + m; 3714 for (i=0; i<m; i++) { 3715 jend = ii[i+1] - ii[i]; 3716 olen = 0; 3717 dlen = 0; 3718 for (j=0; j<jend; j++) { 3719 if (*jj < rstart || *jj >= rend) olen++; 3720 else dlen++; 3721 jj++; 3722 } 3723 olens[i] = olen; 3724 dlens[i] = dlen; 3725 } 3726 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3727 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3728 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3729 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3730 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3731 ierr = PetscFree(dlens);CHKERRQ(ierr); 3732 } else { 3733 PetscInt ml,nl; 3734 3735 M = *newmat; 3736 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3737 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3738 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3739 /* 3740 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3741 rather than the slower MatSetValues(). 3742 */ 3743 M->was_assembled = PETSC_TRUE; 3744 M->assembled = PETSC_FALSE; 3745 } 3746 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3747 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3748 ii = aij->i; 3749 jj = aij->j; 3750 aa = aij->a; 3751 for (i=0; i<m; i++) { 3752 row = rstart + i; 3753 nz = ii[i+1] - ii[i]; 3754 cwork = jj; jj += nz; 3755 vwork = aa; aa += nz; 3756 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3757 } 3758 3759 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3760 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3761 *newmat = M; 3762 3763 /* save submatrix used in processor for next request */ 3764 if (call == MAT_INITIAL_MATRIX) { 3765 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3766 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3767 } 3768 PetscFunctionReturn(0); 3769 } 3770 3771 #undef __FUNCT__ 3772 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3773 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3774 { 3775 PetscInt m,cstart, cend,j,nnz,i,d; 3776 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3777 const PetscInt *JJ; 3778 PetscScalar *values; 3779 PetscErrorCode ierr; 3780 3781 PetscFunctionBegin; 3782 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3783 3784 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3785 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3786 m = B->rmap->n; 3787 cstart = B->cmap->rstart; 3788 cend = B->cmap->rend; 3789 rstart = B->rmap->rstart; 3790 3791 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3792 3793 #if defined(PETSC_USE_DEBUGGING) 3794 for (i=0; i<m; i++) { 3795 nnz = Ii[i+1]- Ii[i]; 3796 JJ = J + Ii[i]; 3797 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3798 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3799 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3800 } 3801 #endif 3802 3803 for (i=0; i<m; i++) { 3804 nnz = Ii[i+1]- Ii[i]; 3805 JJ = J + Ii[i]; 3806 nnz_max = PetscMax(nnz_max,nnz); 3807 d = 0; 3808 for (j=0; j<nnz; j++) { 3809 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3810 } 3811 d_nnz[i] = d; 3812 o_nnz[i] = nnz - d; 3813 } 3814 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3815 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3816 3817 if (v) values = (PetscScalar*)v; 3818 else { 3819 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3820 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3821 } 3822 3823 for (i=0; i<m; i++) { 3824 ii = i + rstart; 3825 nnz = Ii[i+1]- Ii[i]; 3826 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3827 } 3828 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3829 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 3831 if (!v) { 3832 ierr = PetscFree(values);CHKERRQ(ierr); 3833 } 3834 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3835 PetscFunctionReturn(0); 3836 } 3837 3838 #undef __FUNCT__ 3839 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3840 /*@ 3841 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3842 (the default parallel PETSc format). 3843 3844 Collective on MPI_Comm 3845 3846 Input Parameters: 3847 + B - the matrix 3848 . i - the indices into j for the start of each local row (starts with zero) 3849 . j - the column indices for each local row (starts with zero) 3850 - v - optional values in the matrix 3851 3852 Level: developer 3853 3854 Notes: 3855 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3856 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3857 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3858 3859 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3860 3861 The format which is used for the sparse matrix input, is equivalent to a 3862 row-major ordering.. i.e for the following matrix, the input data expected is 3863 as shown: 3864 3865 1 0 0 3866 2 0 3 P0 3867 ------- 3868 4 5 6 P1 3869 3870 Process0 [P0]: rows_owned=[0,1] 3871 i = {0,1,3} [size = nrow+1 = 2+1] 3872 j = {0,0,2} [size = nz = 6] 3873 v = {1,2,3} [size = nz = 6] 3874 3875 Process1 [P1]: rows_owned=[2] 3876 i = {0,3} [size = nrow+1 = 1+1] 3877 j = {0,1,2} [size = nz = 6] 3878 v = {4,5,6} [size = nz = 6] 3879 3880 .keywords: matrix, aij, compressed row, sparse, parallel 3881 3882 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3883 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3884 @*/ 3885 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3886 { 3887 PetscErrorCode ierr; 3888 3889 PetscFunctionBegin; 3890 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3891 PetscFunctionReturn(0); 3892 } 3893 3894 #undef __FUNCT__ 3895 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3896 /*@C 3897 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3898 (the default parallel PETSc format). For good matrix assembly performance 3899 the user should preallocate the matrix storage by setting the parameters 3900 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3901 performance can be increased by more than a factor of 50. 3902 3903 Collective on MPI_Comm 3904 3905 Input Parameters: 3906 + A - the matrix 3907 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3908 (same value is used for all local rows) 3909 . d_nnz - array containing the number of nonzeros in the various rows of the 3910 DIAGONAL portion of the local submatrix (possibly different for each row) 3911 or NULL, if d_nz is used to specify the nonzero structure. 3912 The size of this array is equal to the number of local rows, i.e 'm'. 3913 For matrices that will be factored, you must leave room for (and set) 3914 the diagonal entry even if it is zero. 3915 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3916 submatrix (same value is used for all local rows). 3917 - o_nnz - array containing the number of nonzeros in the various rows of the 3918 OFF-DIAGONAL portion of the local submatrix (possibly different for 3919 each row) or NULL, if o_nz is used to specify the nonzero 3920 structure. The size of this array is equal to the number 3921 of local rows, i.e 'm'. 3922 3923 If the *_nnz parameter is given then the *_nz parameter is ignored 3924 3925 The AIJ format (also called the Yale sparse matrix format or 3926 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3927 storage. The stored row and column indices begin with zero. 3928 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3929 3930 The parallel matrix is partitioned such that the first m0 rows belong to 3931 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3932 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3933 3934 The DIAGONAL portion of the local submatrix of a processor can be defined 3935 as the submatrix which is obtained by extraction the part corresponding to 3936 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3937 first row that belongs to the processor, r2 is the last row belonging to 3938 the this processor, and c1-c2 is range of indices of the local part of a 3939 vector suitable for applying the matrix to. This is an mxn matrix. In the 3940 common case of a square matrix, the row and column ranges are the same and 3941 the DIAGONAL part is also square. The remaining portion of the local 3942 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3943 3944 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3945 3946 You can call MatGetInfo() to get information on how effective the preallocation was; 3947 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3948 You can also run with the option -info and look for messages with the string 3949 malloc in them to see if additional memory allocation was needed. 3950 3951 Example usage: 3952 3953 Consider the following 8x8 matrix with 34 non-zero values, that is 3954 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3955 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3956 as follows: 3957 3958 .vb 3959 1 2 0 | 0 3 0 | 0 4 3960 Proc0 0 5 6 | 7 0 0 | 8 0 3961 9 0 10 | 11 0 0 | 12 0 3962 ------------------------------------- 3963 13 0 14 | 15 16 17 | 0 0 3964 Proc1 0 18 0 | 19 20 21 | 0 0 3965 0 0 0 | 22 23 0 | 24 0 3966 ------------------------------------- 3967 Proc2 25 26 27 | 0 0 28 | 29 0 3968 30 0 0 | 31 32 33 | 0 34 3969 .ve 3970 3971 This can be represented as a collection of submatrices as: 3972 3973 .vb 3974 A B C 3975 D E F 3976 G H I 3977 .ve 3978 3979 Where the submatrices A,B,C are owned by proc0, D,E,F are 3980 owned by proc1, G,H,I are owned by proc2. 3981 3982 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3983 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3984 The 'M','N' parameters are 8,8, and have the same values on all procs. 3985 3986 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3987 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3988 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3989 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3990 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3991 matrix, ans [DF] as another SeqAIJ matrix. 3992 3993 When d_nz, o_nz parameters are specified, d_nz storage elements are 3994 allocated for every row of the local diagonal submatrix, and o_nz 3995 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3996 One way to choose d_nz and o_nz is to use the max nonzerors per local 3997 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3998 In this case, the values of d_nz,o_nz are: 3999 .vb 4000 proc0 : dnz = 2, o_nz = 2 4001 proc1 : dnz = 3, o_nz = 2 4002 proc2 : dnz = 1, o_nz = 4 4003 .ve 4004 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4005 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4006 for proc3. i.e we are using 12+15+10=37 storage locations to store 4007 34 values. 4008 4009 When d_nnz, o_nnz parameters are specified, the storage is specified 4010 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4011 In the above case the values for d_nnz,o_nnz are: 4012 .vb 4013 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4014 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4015 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4016 .ve 4017 Here the space allocated is sum of all the above values i.e 34, and 4018 hence pre-allocation is perfect. 4019 4020 Level: intermediate 4021 4022 .keywords: matrix, aij, compressed row, sparse, parallel 4023 4024 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4025 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4026 @*/ 4027 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4028 { 4029 PetscErrorCode ierr; 4030 4031 PetscFunctionBegin; 4032 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4033 PetscValidType(B,1); 4034 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4035 PetscFunctionReturn(0); 4036 } 4037 4038 #undef __FUNCT__ 4039 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4040 /*@ 4041 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4042 CSR format the local rows. 4043 4044 Collective on MPI_Comm 4045 4046 Input Parameters: 4047 + comm - MPI communicator 4048 . m - number of local rows (Cannot be PETSC_DECIDE) 4049 . n - This value should be the same as the local size used in creating the 4050 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4051 calculated if N is given) For square matrices n is almost always m. 4052 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4053 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4054 . i - row indices 4055 . j - column indices 4056 - a - matrix values 4057 4058 Output Parameter: 4059 . mat - the matrix 4060 4061 Level: intermediate 4062 4063 Notes: 4064 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4065 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4066 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4067 4068 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4069 4070 The format which is used for the sparse matrix input, is equivalent to a 4071 row-major ordering.. i.e for the following matrix, the input data expected is 4072 as shown: 4073 4074 1 0 0 4075 2 0 3 P0 4076 ------- 4077 4 5 6 P1 4078 4079 Process0 [P0]: rows_owned=[0,1] 4080 i = {0,1,3} [size = nrow+1 = 2+1] 4081 j = {0,0,2} [size = nz = 6] 4082 v = {1,2,3} [size = nz = 6] 4083 4084 Process1 [P1]: rows_owned=[2] 4085 i = {0,3} [size = nrow+1 = 1+1] 4086 j = {0,1,2} [size = nz = 6] 4087 v = {4,5,6} [size = nz = 6] 4088 4089 .keywords: matrix, aij, compressed row, sparse, parallel 4090 4091 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4092 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4093 @*/ 4094 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4095 { 4096 PetscErrorCode ierr; 4097 4098 PetscFunctionBegin; 4099 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4100 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4101 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4102 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4103 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4104 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4105 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4106 PetscFunctionReturn(0); 4107 } 4108 4109 #undef __FUNCT__ 4110 #define __FUNCT__ "MatCreateAIJ" 4111 /*@C 4112 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4113 (the default parallel PETSc format). For good matrix assembly performance 4114 the user should preallocate the matrix storage by setting the parameters 4115 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4116 performance can be increased by more than a factor of 50. 4117 4118 Collective on MPI_Comm 4119 4120 Input Parameters: 4121 + comm - MPI communicator 4122 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4123 This value should be the same as the local size used in creating the 4124 y vector for the matrix-vector product y = Ax. 4125 . n - This value should be the same as the local size used in creating the 4126 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4127 calculated if N is given) For square matrices n is almost always m. 4128 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4129 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4130 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4131 (same value is used for all local rows) 4132 . d_nnz - array containing the number of nonzeros in the various rows of the 4133 DIAGONAL portion of the local submatrix (possibly different for each row) 4134 or NULL, if d_nz is used to specify the nonzero structure. 4135 The size of this array is equal to the number of local rows, i.e 'm'. 4136 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4137 submatrix (same value is used for all local rows). 4138 - o_nnz - array containing the number of nonzeros in the various rows of the 4139 OFF-DIAGONAL portion of the local submatrix (possibly different for 4140 each row) or NULL, if o_nz is used to specify the nonzero 4141 structure. The size of this array is equal to the number 4142 of local rows, i.e 'm'. 4143 4144 Output Parameter: 4145 . A - the matrix 4146 4147 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4148 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4149 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4150 4151 Notes: 4152 If the *_nnz parameter is given then the *_nz parameter is ignored 4153 4154 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4155 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4156 storage requirements for this matrix. 4157 4158 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4159 processor than it must be used on all processors that share the object for 4160 that argument. 4161 4162 The user MUST specify either the local or global matrix dimensions 4163 (possibly both). 4164 4165 The parallel matrix is partitioned across processors such that the 4166 first m0 rows belong to process 0, the next m1 rows belong to 4167 process 1, the next m2 rows belong to process 2 etc.. where 4168 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4169 values corresponding to [m x N] submatrix. 4170 4171 The columns are logically partitioned with the n0 columns belonging 4172 to 0th partition, the next n1 columns belonging to the next 4173 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4174 4175 The DIAGONAL portion of the local submatrix on any given processor 4176 is the submatrix corresponding to the rows and columns m,n 4177 corresponding to the given processor. i.e diagonal matrix on 4178 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4179 etc. The remaining portion of the local submatrix [m x (N-n)] 4180 constitute the OFF-DIAGONAL portion. The example below better 4181 illustrates this concept. 4182 4183 For a square global matrix we define each processor's diagonal portion 4184 to be its local rows and the corresponding columns (a square submatrix); 4185 each processor's off-diagonal portion encompasses the remainder of the 4186 local matrix (a rectangular submatrix). 4187 4188 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4189 4190 When calling this routine with a single process communicator, a matrix of 4191 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4192 type of communicator, use the construction mechanism: 4193 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4194 4195 By default, this format uses inodes (identical nodes) when possible. 4196 We search for consecutive rows with the same nonzero structure, thereby 4197 reusing matrix information to achieve increased efficiency. 4198 4199 Options Database Keys: 4200 + -mat_no_inode - Do not use inodes 4201 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4202 - -mat_aij_oneindex - Internally use indexing starting at 1 4203 rather than 0. Note that when calling MatSetValues(), 4204 the user still MUST index entries starting at 0! 4205 4206 4207 Example usage: 4208 4209 Consider the following 8x8 matrix with 34 non-zero values, that is 4210 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4211 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4212 as follows: 4213 4214 .vb 4215 1 2 0 | 0 3 0 | 0 4 4216 Proc0 0 5 6 | 7 0 0 | 8 0 4217 9 0 10 | 11 0 0 | 12 0 4218 ------------------------------------- 4219 13 0 14 | 15 16 17 | 0 0 4220 Proc1 0 18 0 | 19 20 21 | 0 0 4221 0 0 0 | 22 23 0 | 24 0 4222 ------------------------------------- 4223 Proc2 25 26 27 | 0 0 28 | 29 0 4224 30 0 0 | 31 32 33 | 0 34 4225 .ve 4226 4227 This can be represented as a collection of submatrices as: 4228 4229 .vb 4230 A B C 4231 D E F 4232 G H I 4233 .ve 4234 4235 Where the submatrices A,B,C are owned by proc0, D,E,F are 4236 owned by proc1, G,H,I are owned by proc2. 4237 4238 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4239 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4240 The 'M','N' parameters are 8,8, and have the same values on all procs. 4241 4242 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4243 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4244 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4245 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4246 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4247 matrix, ans [DF] as another SeqAIJ matrix. 4248 4249 When d_nz, o_nz parameters are specified, d_nz storage elements are 4250 allocated for every row of the local diagonal submatrix, and o_nz 4251 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4252 One way to choose d_nz and o_nz is to use the max nonzerors per local 4253 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4254 In this case, the values of d_nz,o_nz are: 4255 .vb 4256 proc0 : dnz = 2, o_nz = 2 4257 proc1 : dnz = 3, o_nz = 2 4258 proc2 : dnz = 1, o_nz = 4 4259 .ve 4260 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4261 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4262 for proc3. i.e we are using 12+15+10=37 storage locations to store 4263 34 values. 4264 4265 When d_nnz, o_nnz parameters are specified, the storage is specified 4266 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4267 In the above case the values for d_nnz,o_nnz are: 4268 .vb 4269 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4270 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4271 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4272 .ve 4273 Here the space allocated is sum of all the above values i.e 34, and 4274 hence pre-allocation is perfect. 4275 4276 Level: intermediate 4277 4278 .keywords: matrix, aij, compressed row, sparse, parallel 4279 4280 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4281 MPIAIJ, MatCreateMPIAIJWithArrays() 4282 @*/ 4283 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4284 { 4285 PetscErrorCode ierr; 4286 PetscMPIInt size; 4287 4288 PetscFunctionBegin; 4289 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4290 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4291 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4292 if (size > 1) { 4293 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4294 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4295 } else { 4296 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4297 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4298 } 4299 PetscFunctionReturn(0); 4300 } 4301 4302 #undef __FUNCT__ 4303 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4304 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4305 { 4306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4307 4308 PetscFunctionBegin; 4309 *Ad = a->A; 4310 *Ao = a->B; 4311 *colmap = a->garray; 4312 PetscFunctionReturn(0); 4313 } 4314 4315 #undef __FUNCT__ 4316 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4317 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4318 { 4319 PetscErrorCode ierr; 4320 PetscInt i; 4321 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4322 4323 PetscFunctionBegin; 4324 if (coloring->ctype == IS_COLORING_GLOBAL) { 4325 ISColoringValue *allcolors,*colors; 4326 ISColoring ocoloring; 4327 4328 /* set coloring for diagonal portion */ 4329 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4330 4331 /* set coloring for off-diagonal portion */ 4332 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4333 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4334 for (i=0; i<a->B->cmap->n; i++) { 4335 colors[i] = allcolors[a->garray[i]]; 4336 } 4337 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4338 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4339 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4340 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4341 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4342 ISColoringValue *colors; 4343 PetscInt *larray; 4344 ISColoring ocoloring; 4345 4346 /* set coloring for diagonal portion */ 4347 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4348 for (i=0; i<a->A->cmap->n; i++) { 4349 larray[i] = i + A->cmap->rstart; 4350 } 4351 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4352 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4353 for (i=0; i<a->A->cmap->n; i++) { 4354 colors[i] = coloring->colors[larray[i]]; 4355 } 4356 ierr = PetscFree(larray);CHKERRQ(ierr); 4357 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4358 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4359 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4360 4361 /* set coloring for off-diagonal portion */ 4362 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4363 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4364 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4365 for (i=0; i<a->B->cmap->n; i++) { 4366 colors[i] = coloring->colors[larray[i]]; 4367 } 4368 ierr = PetscFree(larray);CHKERRQ(ierr); 4369 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4370 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4371 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4372 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4373 PetscFunctionReturn(0); 4374 } 4375 4376 #undef __FUNCT__ 4377 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4378 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4379 { 4380 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4381 PetscErrorCode ierr; 4382 4383 PetscFunctionBegin; 4384 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4385 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4386 PetscFunctionReturn(0); 4387 } 4388 4389 #undef __FUNCT__ 4390 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4391 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4392 { 4393 PetscErrorCode ierr; 4394 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4395 PetscInt *indx; 4396 4397 PetscFunctionBegin; 4398 /* This routine will ONLY return MPIAIJ type matrix */ 4399 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4400 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4401 if (n == PETSC_DECIDE) { 4402 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4403 } 4404 /* Check sum(n) = N */ 4405 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4406 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4407 4408 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4409 rstart -= m; 4410 4411 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4412 for (i=0; i<m; i++) { 4413 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4414 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4415 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4416 } 4417 4418 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4419 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4420 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4421 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4422 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4423 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4424 PetscFunctionReturn(0); 4425 } 4426 4427 #undef __FUNCT__ 4428 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4429 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4430 { 4431 PetscErrorCode ierr; 4432 PetscInt m,N,i,rstart,nnz,Ii; 4433 PetscInt *indx; 4434 PetscScalar *values; 4435 4436 PetscFunctionBegin; 4437 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4438 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4439 for (i=0; i<m; i++) { 4440 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4441 Ii = i + rstart; 4442 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4443 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4444 } 4445 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4446 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4447 PetscFunctionReturn(0); 4448 } 4449 4450 #undef __FUNCT__ 4451 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4452 /*@ 4453 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4454 matrices from each processor 4455 4456 Collective on MPI_Comm 4457 4458 Input Parameters: 4459 + comm - the communicators the parallel matrix will live on 4460 . inmat - the input sequential matrices 4461 . n - number of local columns (or PETSC_DECIDE) 4462 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4463 4464 Output Parameter: 4465 . outmat - the parallel matrix generated 4466 4467 Level: advanced 4468 4469 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4470 4471 @*/ 4472 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4473 { 4474 PetscErrorCode ierr; 4475 4476 PetscFunctionBegin; 4477 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4478 if (scall == MAT_INITIAL_MATRIX) { 4479 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4480 } 4481 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4482 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4483 PetscFunctionReturn(0); 4484 } 4485 4486 #undef __FUNCT__ 4487 #define __FUNCT__ "MatFileSplit" 4488 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4489 { 4490 PetscErrorCode ierr; 4491 PetscMPIInt rank; 4492 PetscInt m,N,i,rstart,nnz; 4493 size_t len; 4494 const PetscInt *indx; 4495 PetscViewer out; 4496 char *name; 4497 Mat B; 4498 const PetscScalar *values; 4499 4500 PetscFunctionBegin; 4501 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4502 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4503 /* Should this be the type of the diagonal block of A? */ 4504 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4505 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4506 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4507 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4508 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4509 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4510 for (i=0; i<m; i++) { 4511 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4512 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4513 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4514 } 4515 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4516 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4517 4518 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4519 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4520 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4521 sprintf(name,"%s.%d",outfile,rank); 4522 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4523 ierr = PetscFree(name);CHKERRQ(ierr); 4524 ierr = MatView(B,out);CHKERRQ(ierr); 4525 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4526 ierr = MatDestroy(&B);CHKERRQ(ierr); 4527 PetscFunctionReturn(0); 4528 } 4529 4530 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4531 #undef __FUNCT__ 4532 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4533 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4534 { 4535 PetscErrorCode ierr; 4536 Mat_Merge_SeqsToMPI *merge; 4537 PetscContainer container; 4538 4539 PetscFunctionBegin; 4540 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4541 if (container) { 4542 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4543 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4544 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4545 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4546 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4547 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4548 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4555 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4556 ierr = PetscFree(merge);CHKERRQ(ierr); 4557 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4558 } 4559 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4560 PetscFunctionReturn(0); 4561 } 4562 4563 #include <../src/mat/utils/freespace.h> 4564 #include <petscbt.h> 4565 4566 #undef __FUNCT__ 4567 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4568 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4569 { 4570 PetscErrorCode ierr; 4571 MPI_Comm comm; 4572 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4573 PetscMPIInt size,rank,taga,*len_s; 4574 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4575 PetscInt proc,m; 4576 PetscInt **buf_ri,**buf_rj; 4577 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4578 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4579 MPI_Request *s_waits,*r_waits; 4580 MPI_Status *status; 4581 MatScalar *aa=a->a; 4582 MatScalar **abuf_r,*ba_i; 4583 Mat_Merge_SeqsToMPI *merge; 4584 PetscContainer container; 4585 4586 PetscFunctionBegin; 4587 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4588 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4589 4590 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4591 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4592 4593 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4594 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4595 4596 bi = merge->bi; 4597 bj = merge->bj; 4598 buf_ri = merge->buf_ri; 4599 buf_rj = merge->buf_rj; 4600 4601 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4602 owners = merge->rowmap->range; 4603 len_s = merge->len_s; 4604 4605 /* send and recv matrix values */ 4606 /*-----------------------------*/ 4607 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4608 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4609 4610 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4611 for (proc=0,k=0; proc<size; proc++) { 4612 if (!len_s[proc]) continue; 4613 i = owners[proc]; 4614 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4615 k++; 4616 } 4617 4618 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4619 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4620 ierr = PetscFree(status);CHKERRQ(ierr); 4621 4622 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4623 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4624 4625 /* insert mat values of mpimat */ 4626 /*----------------------------*/ 4627 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4628 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4629 4630 for (k=0; k<merge->nrecv; k++) { 4631 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4632 nrows = *(buf_ri_k[k]); 4633 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4634 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4635 } 4636 4637 /* set values of ba */ 4638 m = merge->rowmap->n; 4639 for (i=0; i<m; i++) { 4640 arow = owners[rank] + i; 4641 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4642 bnzi = bi[i+1] - bi[i]; 4643 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4644 4645 /* add local non-zero vals of this proc's seqmat into ba */ 4646 anzi = ai[arow+1] - ai[arow]; 4647 aj = a->j + ai[arow]; 4648 aa = a->a + ai[arow]; 4649 nextaj = 0; 4650 for (j=0; nextaj<anzi; j++) { 4651 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4652 ba_i[j] += aa[nextaj++]; 4653 } 4654 } 4655 4656 /* add received vals into ba */ 4657 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4658 /* i-th row */ 4659 if (i == *nextrow[k]) { 4660 anzi = *(nextai[k]+1) - *nextai[k]; 4661 aj = buf_rj[k] + *(nextai[k]); 4662 aa = abuf_r[k] + *(nextai[k]); 4663 nextaj = 0; 4664 for (j=0; nextaj<anzi; j++) { 4665 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4666 ba_i[j] += aa[nextaj++]; 4667 } 4668 } 4669 nextrow[k]++; nextai[k]++; 4670 } 4671 } 4672 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4673 } 4674 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4675 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4676 4677 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4678 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4679 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4680 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4681 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4682 PetscFunctionReturn(0); 4683 } 4684 4685 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4686 4687 #undef __FUNCT__ 4688 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4689 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4690 { 4691 PetscErrorCode ierr; 4692 Mat B_mpi; 4693 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4694 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4695 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4696 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4697 PetscInt len,proc,*dnz,*onz,bs,cbs; 4698 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4699 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4700 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4701 MPI_Status *status; 4702 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4703 PetscBT lnkbt; 4704 Mat_Merge_SeqsToMPI *merge; 4705 PetscContainer container; 4706 4707 PetscFunctionBegin; 4708 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4709 4710 /* make sure it is a PETSc comm */ 4711 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4712 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4713 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4714 4715 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4716 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4717 4718 /* determine row ownership */ 4719 /*---------------------------------------------------------*/ 4720 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4721 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4722 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4723 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4724 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4725 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4726 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4727 4728 m = merge->rowmap->n; 4729 owners = merge->rowmap->range; 4730 4731 /* determine the number of messages to send, their lengths */ 4732 /*---------------------------------------------------------*/ 4733 len_s = merge->len_s; 4734 4735 len = 0; /* length of buf_si[] */ 4736 merge->nsend = 0; 4737 for (proc=0; proc<size; proc++) { 4738 len_si[proc] = 0; 4739 if (proc == rank) { 4740 len_s[proc] = 0; 4741 } else { 4742 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4743 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4744 } 4745 if (len_s[proc]) { 4746 merge->nsend++; 4747 nrows = 0; 4748 for (i=owners[proc]; i<owners[proc+1]; i++) { 4749 if (ai[i+1] > ai[i]) nrows++; 4750 } 4751 len_si[proc] = 2*(nrows+1); 4752 len += len_si[proc]; 4753 } 4754 } 4755 4756 /* determine the number and length of messages to receive for ij-structure */ 4757 /*-------------------------------------------------------------------------*/ 4758 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4759 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4760 4761 /* post the Irecv of j-structure */ 4762 /*-------------------------------*/ 4763 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4764 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4765 4766 /* post the Isend of j-structure */ 4767 /*--------------------------------*/ 4768 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4769 4770 for (proc=0, k=0; proc<size; proc++) { 4771 if (!len_s[proc]) continue; 4772 i = owners[proc]; 4773 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4774 k++; 4775 } 4776 4777 /* receives and sends of j-structure are complete */ 4778 /*------------------------------------------------*/ 4779 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4780 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4781 4782 /* send and recv i-structure */ 4783 /*---------------------------*/ 4784 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4785 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4786 4787 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4788 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4789 for (proc=0,k=0; proc<size; proc++) { 4790 if (!len_s[proc]) continue; 4791 /* form outgoing message for i-structure: 4792 buf_si[0]: nrows to be sent 4793 [1:nrows]: row index (global) 4794 [nrows+1:2*nrows+1]: i-structure index 4795 */ 4796 /*-------------------------------------------*/ 4797 nrows = len_si[proc]/2 - 1; 4798 buf_si_i = buf_si + nrows+1; 4799 buf_si[0] = nrows; 4800 buf_si_i[0] = 0; 4801 nrows = 0; 4802 for (i=owners[proc]; i<owners[proc+1]; i++) { 4803 anzi = ai[i+1] - ai[i]; 4804 if (anzi) { 4805 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4806 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4807 nrows++; 4808 } 4809 } 4810 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4811 k++; 4812 buf_si += len_si[proc]; 4813 } 4814 4815 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4816 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4817 4818 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4819 for (i=0; i<merge->nrecv; i++) { 4820 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4821 } 4822 4823 ierr = PetscFree(len_si);CHKERRQ(ierr); 4824 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4825 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4826 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4827 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4828 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4829 ierr = PetscFree(status);CHKERRQ(ierr); 4830 4831 /* compute a local seq matrix in each processor */ 4832 /*----------------------------------------------*/ 4833 /* allocate bi array and free space for accumulating nonzero column info */ 4834 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4835 bi[0] = 0; 4836 4837 /* create and initialize a linked list */ 4838 nlnk = N+1; 4839 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4840 4841 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4842 len = ai[owners[rank+1]] - ai[owners[rank]]; 4843 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4844 4845 current_space = free_space; 4846 4847 /* determine symbolic info for each local row */ 4848 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4849 4850 for (k=0; k<merge->nrecv; k++) { 4851 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4852 nrows = *buf_ri_k[k]; 4853 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4854 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4855 } 4856 4857 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4858 len = 0; 4859 for (i=0; i<m; i++) { 4860 bnzi = 0; 4861 /* add local non-zero cols of this proc's seqmat into lnk */ 4862 arow = owners[rank] + i; 4863 anzi = ai[arow+1] - ai[arow]; 4864 aj = a->j + ai[arow]; 4865 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4866 bnzi += nlnk; 4867 /* add received col data into lnk */ 4868 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4869 if (i == *nextrow[k]) { /* i-th row */ 4870 anzi = *(nextai[k]+1) - *nextai[k]; 4871 aj = buf_rj[k] + *nextai[k]; 4872 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4873 bnzi += nlnk; 4874 nextrow[k]++; nextai[k]++; 4875 } 4876 } 4877 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4878 4879 /* if free space is not available, make more free space */ 4880 if (current_space->local_remaining<bnzi) { 4881 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4882 nspacedouble++; 4883 } 4884 /* copy data into free space, then initialize lnk */ 4885 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4886 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4887 4888 current_space->array += bnzi; 4889 current_space->local_used += bnzi; 4890 current_space->local_remaining -= bnzi; 4891 4892 bi[i+1] = bi[i] + bnzi; 4893 } 4894 4895 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4896 4897 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4898 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4899 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4900 4901 /* create symbolic parallel matrix B_mpi */ 4902 /*---------------------------------------*/ 4903 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4904 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4905 if (n==PETSC_DECIDE) { 4906 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4907 } else { 4908 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4909 } 4910 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4911 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4912 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4913 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4914 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4915 4916 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4917 B_mpi->assembled = PETSC_FALSE; 4918 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4919 merge->bi = bi; 4920 merge->bj = bj; 4921 merge->buf_ri = buf_ri; 4922 merge->buf_rj = buf_rj; 4923 merge->coi = NULL; 4924 merge->coj = NULL; 4925 merge->owners_co = NULL; 4926 4927 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4928 4929 /* attach the supporting struct to B_mpi for reuse */ 4930 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4931 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4932 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4933 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4934 *mpimat = B_mpi; 4935 4936 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4937 PetscFunctionReturn(0); 4938 } 4939 4940 #undef __FUNCT__ 4941 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4942 /*@C 4943 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4944 matrices from each processor 4945 4946 Collective on MPI_Comm 4947 4948 Input Parameters: 4949 + comm - the communicators the parallel matrix will live on 4950 . seqmat - the input sequential matrices 4951 . m - number of local rows (or PETSC_DECIDE) 4952 . n - number of local columns (or PETSC_DECIDE) 4953 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4954 4955 Output Parameter: 4956 . mpimat - the parallel matrix generated 4957 4958 Level: advanced 4959 4960 Notes: 4961 The dimensions of the sequential matrix in each processor MUST be the same. 4962 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4963 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4964 @*/ 4965 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4966 { 4967 PetscErrorCode ierr; 4968 PetscMPIInt size; 4969 4970 PetscFunctionBegin; 4971 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4972 if (size == 1) { 4973 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4974 if (scall == MAT_INITIAL_MATRIX) { 4975 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4976 } else { 4977 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4978 } 4979 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4980 PetscFunctionReturn(0); 4981 } 4982 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4983 if (scall == MAT_INITIAL_MATRIX) { 4984 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4985 } 4986 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4987 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4988 PetscFunctionReturn(0); 4989 } 4990 4991 #undef __FUNCT__ 4992 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4993 /*@ 4994 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4995 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4996 with MatGetSize() 4997 4998 Not Collective 4999 5000 Input Parameters: 5001 + A - the matrix 5002 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5003 5004 Output Parameter: 5005 . A_loc - the local sequential matrix generated 5006 5007 Level: developer 5008 5009 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5010 5011 @*/ 5012 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5013 { 5014 PetscErrorCode ierr; 5015 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5016 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5017 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5018 MatScalar *aa=a->a,*ba=b->a,*cam; 5019 PetscScalar *ca; 5020 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5021 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5022 PetscBool match; 5023 5024 PetscFunctionBegin; 5025 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5026 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5027 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5028 if (scall == MAT_INITIAL_MATRIX) { 5029 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5030 ci[0] = 0; 5031 for (i=0; i<am; i++) { 5032 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5033 } 5034 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5035 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5036 k = 0; 5037 for (i=0; i<am; i++) { 5038 ncols_o = bi[i+1] - bi[i]; 5039 ncols_d = ai[i+1] - ai[i]; 5040 /* off-diagonal portion of A */ 5041 for (jo=0; jo<ncols_o; jo++) { 5042 col = cmap[*bj]; 5043 if (col >= cstart) break; 5044 cj[k] = col; bj++; 5045 ca[k++] = *ba++; 5046 } 5047 /* diagonal portion of A */ 5048 for (j=0; j<ncols_d; j++) { 5049 cj[k] = cstart + *aj++; 5050 ca[k++] = *aa++; 5051 } 5052 /* off-diagonal portion of A */ 5053 for (j=jo; j<ncols_o; j++) { 5054 cj[k] = cmap[*bj++]; 5055 ca[k++] = *ba++; 5056 } 5057 } 5058 /* put together the new matrix */ 5059 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5060 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5061 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5062 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5063 mat->free_a = PETSC_TRUE; 5064 mat->free_ij = PETSC_TRUE; 5065 mat->nonew = 0; 5066 } else if (scall == MAT_REUSE_MATRIX) { 5067 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5068 ci = mat->i; cj = mat->j; cam = mat->a; 5069 for (i=0; i<am; i++) { 5070 /* off-diagonal portion of A */ 5071 ncols_o = bi[i+1] - bi[i]; 5072 for (jo=0; jo<ncols_o; jo++) { 5073 col = cmap[*bj]; 5074 if (col >= cstart) break; 5075 *cam++ = *ba++; bj++; 5076 } 5077 /* diagonal portion of A */ 5078 ncols_d = ai[i+1] - ai[i]; 5079 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5080 /* off-diagonal portion of A */ 5081 for (j=jo; j<ncols_o; j++) { 5082 *cam++ = *ba++; bj++; 5083 } 5084 } 5085 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5086 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5087 PetscFunctionReturn(0); 5088 } 5089 5090 #undef __FUNCT__ 5091 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5092 /*@C 5093 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5094 5095 Not Collective 5096 5097 Input Parameters: 5098 + A - the matrix 5099 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5100 - row, col - index sets of rows and columns to extract (or NULL) 5101 5102 Output Parameter: 5103 . A_loc - the local sequential matrix generated 5104 5105 Level: developer 5106 5107 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5108 5109 @*/ 5110 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5111 { 5112 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5113 PetscErrorCode ierr; 5114 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5115 IS isrowa,iscola; 5116 Mat *aloc; 5117 PetscBool match; 5118 5119 PetscFunctionBegin; 5120 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5121 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5122 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5123 if (!row) { 5124 start = A->rmap->rstart; end = A->rmap->rend; 5125 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5126 } else { 5127 isrowa = *row; 5128 } 5129 if (!col) { 5130 start = A->cmap->rstart; 5131 cmap = a->garray; 5132 nzA = a->A->cmap->n; 5133 nzB = a->B->cmap->n; 5134 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5135 ncols = 0; 5136 for (i=0; i<nzB; i++) { 5137 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5138 else break; 5139 } 5140 imark = i; 5141 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5142 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5143 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5144 } else { 5145 iscola = *col; 5146 } 5147 if (scall != MAT_INITIAL_MATRIX) { 5148 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5149 aloc[0] = *A_loc; 5150 } 5151 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5152 *A_loc = aloc[0]; 5153 ierr = PetscFree(aloc);CHKERRQ(ierr); 5154 if (!row) { 5155 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5156 } 5157 if (!col) { 5158 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5159 } 5160 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5161 PetscFunctionReturn(0); 5162 } 5163 5164 #undef __FUNCT__ 5165 #define __FUNCT__ "MatGetBrowsOfAcols" 5166 /*@C 5167 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5168 5169 Collective on Mat 5170 5171 Input Parameters: 5172 + A,B - the matrices in mpiaij format 5173 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5174 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5175 5176 Output Parameter: 5177 + rowb, colb - index sets of rows and columns of B to extract 5178 - B_seq - the sequential matrix generated 5179 5180 Level: developer 5181 5182 @*/ 5183 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5184 { 5185 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5186 PetscErrorCode ierr; 5187 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5188 IS isrowb,iscolb; 5189 Mat *bseq=NULL; 5190 5191 PetscFunctionBegin; 5192 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5193 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5194 } 5195 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5196 5197 if (scall == MAT_INITIAL_MATRIX) { 5198 start = A->cmap->rstart; 5199 cmap = a->garray; 5200 nzA = a->A->cmap->n; 5201 nzB = a->B->cmap->n; 5202 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5203 ncols = 0; 5204 for (i=0; i<nzB; i++) { /* row < local row index */ 5205 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5206 else break; 5207 } 5208 imark = i; 5209 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5210 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5211 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5212 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5213 } else { 5214 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5215 isrowb = *rowb; iscolb = *colb; 5216 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5217 bseq[0] = *B_seq; 5218 } 5219 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5220 *B_seq = bseq[0]; 5221 ierr = PetscFree(bseq);CHKERRQ(ierr); 5222 if (!rowb) { 5223 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5224 } else { 5225 *rowb = isrowb; 5226 } 5227 if (!colb) { 5228 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5229 } else { 5230 *colb = iscolb; 5231 } 5232 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5233 PetscFunctionReturn(0); 5234 } 5235 5236 #undef __FUNCT__ 5237 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5238 /* 5239 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5240 of the OFF-DIAGONAL portion of local A 5241 5242 Collective on Mat 5243 5244 Input Parameters: 5245 + A,B - the matrices in mpiaij format 5246 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5247 5248 Output Parameter: 5249 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5250 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5251 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5252 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5253 5254 Level: developer 5255 5256 */ 5257 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5258 { 5259 VecScatter_MPI_General *gen_to,*gen_from; 5260 PetscErrorCode ierr; 5261 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5262 Mat_SeqAIJ *b_oth; 5263 VecScatter ctx =a->Mvctx; 5264 MPI_Comm comm; 5265 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5266 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5267 PetscScalar *rvalues,*svalues; 5268 MatScalar *b_otha,*bufa,*bufA; 5269 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5270 MPI_Request *rwaits = NULL,*swaits = NULL; 5271 MPI_Status *sstatus,rstatus; 5272 PetscMPIInt jj; 5273 PetscInt *cols,sbs,rbs; 5274 PetscScalar *vals; 5275 5276 PetscFunctionBegin; 5277 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5278 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5279 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5280 } 5281 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5282 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5283 5284 gen_to = (VecScatter_MPI_General*)ctx->todata; 5285 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5286 rvalues = gen_from->values; /* holds the length of receiving row */ 5287 svalues = gen_to->values; /* holds the length of sending row */ 5288 nrecvs = gen_from->n; 5289 nsends = gen_to->n; 5290 5291 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5292 srow = gen_to->indices; /* local row index to be sent */ 5293 sstarts = gen_to->starts; 5294 sprocs = gen_to->procs; 5295 sstatus = gen_to->sstatus; 5296 sbs = gen_to->bs; 5297 rstarts = gen_from->starts; 5298 rprocs = gen_from->procs; 5299 rbs = gen_from->bs; 5300 5301 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5302 if (scall == MAT_INITIAL_MATRIX) { 5303 /* i-array */ 5304 /*---------*/ 5305 /* post receives */ 5306 for (i=0; i<nrecvs; i++) { 5307 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5308 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5309 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5310 } 5311 5312 /* pack the outgoing message */ 5313 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5314 5315 sstartsj[0] = 0; 5316 rstartsj[0] = 0; 5317 len = 0; /* total length of j or a array to be sent */ 5318 k = 0; 5319 for (i=0; i<nsends; i++) { 5320 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5321 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5322 for (j=0; j<nrows; j++) { 5323 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5324 for (l=0; l<sbs; l++) { 5325 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5326 5327 rowlen[j*sbs+l] = ncols; 5328 5329 len += ncols; 5330 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5331 } 5332 k++; 5333 } 5334 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5335 5336 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5337 } 5338 /* recvs and sends of i-array are completed */ 5339 i = nrecvs; 5340 while (i--) { 5341 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5342 } 5343 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5344 5345 /* allocate buffers for sending j and a arrays */ 5346 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5347 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5348 5349 /* create i-array of B_oth */ 5350 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5351 5352 b_othi[0] = 0; 5353 len = 0; /* total length of j or a array to be received */ 5354 k = 0; 5355 for (i=0; i<nrecvs; i++) { 5356 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5357 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5358 for (j=0; j<nrows; j++) { 5359 b_othi[k+1] = b_othi[k] + rowlen[j]; 5360 len += rowlen[j]; k++; 5361 } 5362 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5363 } 5364 5365 /* allocate space for j and a arrrays of B_oth */ 5366 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5367 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5368 5369 /* j-array */ 5370 /*---------*/ 5371 /* post receives of j-array */ 5372 for (i=0; i<nrecvs; i++) { 5373 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5374 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5375 } 5376 5377 /* pack the outgoing message j-array */ 5378 k = 0; 5379 for (i=0; i<nsends; i++) { 5380 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5381 bufJ = bufj+sstartsj[i]; 5382 for (j=0; j<nrows; j++) { 5383 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5384 for (ll=0; ll<sbs; ll++) { 5385 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5386 for (l=0; l<ncols; l++) { 5387 *bufJ++ = cols[l]; 5388 } 5389 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5390 } 5391 } 5392 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5393 } 5394 5395 /* recvs and sends of j-array are completed */ 5396 i = nrecvs; 5397 while (i--) { 5398 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5399 } 5400 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5401 } else if (scall == MAT_REUSE_MATRIX) { 5402 sstartsj = *startsj_s; 5403 rstartsj = *startsj_r; 5404 bufa = *bufa_ptr; 5405 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5406 b_otha = b_oth->a; 5407 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5408 5409 /* a-array */ 5410 /*---------*/ 5411 /* post receives of a-array */ 5412 for (i=0; i<nrecvs; i++) { 5413 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5414 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5415 } 5416 5417 /* pack the outgoing message a-array */ 5418 k = 0; 5419 for (i=0; i<nsends; i++) { 5420 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5421 bufA = bufa+sstartsj[i]; 5422 for (j=0; j<nrows; j++) { 5423 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5424 for (ll=0; ll<sbs; ll++) { 5425 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5426 for (l=0; l<ncols; l++) { 5427 *bufA++ = vals[l]; 5428 } 5429 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5430 } 5431 } 5432 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5433 } 5434 /* recvs and sends of a-array are completed */ 5435 i = nrecvs; 5436 while (i--) { 5437 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5438 } 5439 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5440 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5441 5442 if (scall == MAT_INITIAL_MATRIX) { 5443 /* put together the new matrix */ 5444 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5445 5446 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5447 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5448 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5449 b_oth->free_a = PETSC_TRUE; 5450 b_oth->free_ij = PETSC_TRUE; 5451 b_oth->nonew = 0; 5452 5453 ierr = PetscFree(bufj);CHKERRQ(ierr); 5454 if (!startsj_s || !bufa_ptr) { 5455 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5456 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5457 } else { 5458 *startsj_s = sstartsj; 5459 *startsj_r = rstartsj; 5460 *bufa_ptr = bufa; 5461 } 5462 } 5463 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5464 PetscFunctionReturn(0); 5465 } 5466 5467 #undef __FUNCT__ 5468 #define __FUNCT__ "MatGetCommunicationStructs" 5469 /*@C 5470 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5471 5472 Not Collective 5473 5474 Input Parameters: 5475 . A - The matrix in mpiaij format 5476 5477 Output Parameter: 5478 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5479 . colmap - A map from global column index to local index into lvec 5480 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5481 5482 Level: developer 5483 5484 @*/ 5485 #if defined(PETSC_USE_CTABLE) 5486 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5487 #else 5488 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5489 #endif 5490 { 5491 Mat_MPIAIJ *a; 5492 5493 PetscFunctionBegin; 5494 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5495 PetscValidPointer(lvec, 2); 5496 PetscValidPointer(colmap, 3); 5497 PetscValidPointer(multScatter, 4); 5498 a = (Mat_MPIAIJ*) A->data; 5499 if (lvec) *lvec = a->lvec; 5500 if (colmap) *colmap = a->colmap; 5501 if (multScatter) *multScatter = a->Mvctx; 5502 PetscFunctionReturn(0); 5503 } 5504 5505 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5506 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5507 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5508 5509 #undef __FUNCT__ 5510 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5511 /* 5512 Computes (B'*A')' since computing B*A directly is untenable 5513 5514 n p p 5515 ( ) ( ) ( ) 5516 m ( A ) * n ( B ) = m ( C ) 5517 ( ) ( ) ( ) 5518 5519 */ 5520 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5521 { 5522 PetscErrorCode ierr; 5523 Mat At,Bt,Ct; 5524 5525 PetscFunctionBegin; 5526 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5527 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5528 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5529 ierr = MatDestroy(&At);CHKERRQ(ierr); 5530 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5531 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5532 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5533 PetscFunctionReturn(0); 5534 } 5535 5536 #undef __FUNCT__ 5537 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5538 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5539 { 5540 PetscErrorCode ierr; 5541 PetscInt m=A->rmap->n,n=B->cmap->n; 5542 Mat Cmat; 5543 5544 PetscFunctionBegin; 5545 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5546 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5547 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5548 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5549 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5550 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5551 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5552 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5553 5554 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5555 5556 *C = Cmat; 5557 PetscFunctionReturn(0); 5558 } 5559 5560 /* ----------------------------------------------------------------*/ 5561 #undef __FUNCT__ 5562 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5563 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5564 { 5565 PetscErrorCode ierr; 5566 5567 PetscFunctionBegin; 5568 if (scall == MAT_INITIAL_MATRIX) { 5569 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5570 } 5571 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5572 PetscFunctionReturn(0); 5573 } 5574 5575 #if defined(PETSC_HAVE_MUMPS) 5576 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5577 #endif 5578 #if defined(PETSC_HAVE_PASTIX) 5579 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5580 #endif 5581 #if defined(PETSC_HAVE_SUPERLU_DIST) 5582 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5583 #endif 5584 #if defined(PETSC_HAVE_CLIQUE) 5585 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5586 #endif 5587 5588 /*MC 5589 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5590 5591 Options Database Keys: 5592 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5593 5594 Level: beginner 5595 5596 .seealso: MatCreateAIJ() 5597 M*/ 5598 5599 #undef __FUNCT__ 5600 #define __FUNCT__ "MatCreate_MPIAIJ" 5601 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5602 { 5603 Mat_MPIAIJ *b; 5604 PetscErrorCode ierr; 5605 PetscMPIInt size; 5606 5607 PetscFunctionBegin; 5608 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5609 5610 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5611 B->data = (void*)b; 5612 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5613 B->assembled = PETSC_FALSE; 5614 B->insertmode = NOT_SET_VALUES; 5615 b->size = size; 5616 5617 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5618 5619 /* build cache for off array entries formed */ 5620 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5621 5622 b->donotstash = PETSC_FALSE; 5623 b->colmap = 0; 5624 b->garray = 0; 5625 b->roworiented = PETSC_TRUE; 5626 5627 /* stuff used for matrix vector multiply */ 5628 b->lvec = NULL; 5629 b->Mvctx = NULL; 5630 5631 /* stuff for MatGetRow() */ 5632 b->rowindices = 0; 5633 b->rowvalues = 0; 5634 b->getrowactive = PETSC_FALSE; 5635 5636 /* flexible pointer used in CUSP/CUSPARSE classes */ 5637 b->spptr = NULL; 5638 5639 #if defined(PETSC_HAVE_MUMPS) 5640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C","MatGetFactor_aij_mumps",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5641 #endif 5642 #if defined(PETSC_HAVE_PASTIX) 5643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C","MatGetFactor_mpiaij_pastix",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5644 #endif 5645 #if defined(PETSC_HAVE_SUPERLU_DIST) 5646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C","MatGetFactor_mpiaij_superlu_dist",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5647 #endif 5648 #if defined(PETSC_HAVE_CLIQUE) 5649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C","MatGetFactor_aij_clique",MatGetFactor_aij_clique);CHKERRQ(ierr); 5650 #endif 5651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIAIJ",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIAIJ",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIAIJ",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_MPIAIJ",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C","MatMPIAIJSetPreallocation_MPIAIJ",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C","MatMPIAIJSetPreallocationCSR_MPIAIJ",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIAIJ",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C","MatConvert_MPIAIJ_MPIAIJPERM",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C","MatConvert_MPIAIJ_MPIAIJCRL",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C","MatConvert_MPIAIJ_MPISBAIJ",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C","MatMatMult_MPIDense_MPIAIJ",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C","MatMatMultSymbolic_MPIDense_MPIAIJ",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C","MatMatMultNumeric_MPIDense_MPIAIJ",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5664 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5665 PetscFunctionReturn(0); 5666 } 5667 5668 #undef __FUNCT__ 5669 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5670 /*@ 5671 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5672 and "off-diagonal" part of the matrix in CSR format. 5673 5674 Collective on MPI_Comm 5675 5676 Input Parameters: 5677 + comm - MPI communicator 5678 . m - number of local rows (Cannot be PETSC_DECIDE) 5679 . n - This value should be the same as the local size used in creating the 5680 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5681 calculated if N is given) For square matrices n is almost always m. 5682 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5683 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5684 . i - row indices for "diagonal" portion of matrix 5685 . j - column indices 5686 . a - matrix values 5687 . oi - row indices for "off-diagonal" portion of matrix 5688 . oj - column indices 5689 - oa - matrix values 5690 5691 Output Parameter: 5692 . mat - the matrix 5693 5694 Level: advanced 5695 5696 Notes: 5697 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5698 must free the arrays once the matrix has been destroyed and not before. 5699 5700 The i and j indices are 0 based 5701 5702 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5703 5704 This sets local rows and cannot be used to set off-processor values. 5705 5706 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5707 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5708 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5709 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5710 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5711 communication if it is known that only local entries will be set. 5712 5713 .keywords: matrix, aij, compressed row, sparse, parallel 5714 5715 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5716 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5717 @*/ 5718 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5719 { 5720 PetscErrorCode ierr; 5721 Mat_MPIAIJ *maij; 5722 5723 PetscFunctionBegin; 5724 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5725 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5726 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5727 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5728 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5729 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5730 maij = (Mat_MPIAIJ*) (*mat)->data; 5731 5732 (*mat)->preallocated = PETSC_TRUE; 5733 5734 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5735 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5736 5737 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5738 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5739 5740 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5741 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5742 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5743 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5744 5745 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5746 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5747 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5748 PetscFunctionReturn(0); 5749 } 5750 5751 /* 5752 Special version for direct calls from Fortran 5753 */ 5754 #include <petsc-private/fortranimpl.h> 5755 5756 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5757 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5758 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5759 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5760 #endif 5761 5762 /* Change these macros so can be used in void function */ 5763 #undef CHKERRQ 5764 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5765 #undef SETERRQ2 5766 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5767 #undef SETERRQ3 5768 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5769 #undef SETERRQ 5770 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5771 5772 #undef __FUNCT__ 5773 #define __FUNCT__ "matsetvaluesmpiaij_" 5774 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5775 { 5776 Mat mat = *mmat; 5777 PetscInt m = *mm, n = *mn; 5778 InsertMode addv = *maddv; 5779 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5780 PetscScalar value; 5781 PetscErrorCode ierr; 5782 5783 MatCheckPreallocated(mat,1); 5784 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5785 5786 #if defined(PETSC_USE_DEBUG) 5787 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5788 #endif 5789 { 5790 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5791 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5792 PetscBool roworiented = aij->roworiented; 5793 5794 /* Some Variables required in the macro */ 5795 Mat A = aij->A; 5796 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5797 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5798 MatScalar *aa = a->a; 5799 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5800 Mat B = aij->B; 5801 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5802 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5803 MatScalar *ba = b->a; 5804 5805 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5806 PetscInt nonew = a->nonew; 5807 MatScalar *ap1,*ap2; 5808 5809 PetscFunctionBegin; 5810 for (i=0; i<m; i++) { 5811 if (im[i] < 0) continue; 5812 #if defined(PETSC_USE_DEBUG) 5813 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5814 #endif 5815 if (im[i] >= rstart && im[i] < rend) { 5816 row = im[i] - rstart; 5817 lastcol1 = -1; 5818 rp1 = aj + ai[row]; 5819 ap1 = aa + ai[row]; 5820 rmax1 = aimax[row]; 5821 nrow1 = ailen[row]; 5822 low1 = 0; 5823 high1 = nrow1; 5824 lastcol2 = -1; 5825 rp2 = bj + bi[row]; 5826 ap2 = ba + bi[row]; 5827 rmax2 = bimax[row]; 5828 nrow2 = bilen[row]; 5829 low2 = 0; 5830 high2 = nrow2; 5831 5832 for (j=0; j<n; j++) { 5833 if (roworiented) value = v[i*n+j]; 5834 else value = v[i+j*m]; 5835 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5836 if (in[j] >= cstart && in[j] < cend) { 5837 col = in[j] - cstart; 5838 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5839 } else if (in[j] < 0) continue; 5840 #if defined(PETSC_USE_DEBUG) 5841 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5842 #endif 5843 else { 5844 if (mat->was_assembled) { 5845 if (!aij->colmap) { 5846 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5847 } 5848 #if defined(PETSC_USE_CTABLE) 5849 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5850 col--; 5851 #else 5852 col = aij->colmap[in[j]] - 1; 5853 #endif 5854 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5855 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5856 col = in[j]; 5857 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5858 B = aij->B; 5859 b = (Mat_SeqAIJ*)B->data; 5860 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5861 rp2 = bj + bi[row]; 5862 ap2 = ba + bi[row]; 5863 rmax2 = bimax[row]; 5864 nrow2 = bilen[row]; 5865 low2 = 0; 5866 high2 = nrow2; 5867 bm = aij->B->rmap->n; 5868 ba = b->a; 5869 } 5870 } else col = in[j]; 5871 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5872 } 5873 } 5874 } else if (!aij->donotstash) { 5875 if (roworiented) { 5876 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5877 } else { 5878 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5879 } 5880 } 5881 } 5882 } 5883 PetscFunctionReturnVoid(); 5884 } 5885 5886