1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 6 /*MC 7 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 8 9 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 10 and MATMPIAIJ otherwise. As a result, for single process communicators, 11 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 12 for communicators controlling multiple processes. It is recommended that you call both of 13 the above preallocation routines for simplicity. 14 15 Options Database Keys: 16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 17 18 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 19 enough exist. 20 21 Level: beginner 22 23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 24 M*/ 25 26 /*MC 27 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 28 29 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 30 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 31 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 32 for communicators controlling multiple processes. It is recommended that you call both of 33 the above preallocation routines for simplicity. 34 35 Options Database Keys: 36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 37 38 Level: beginner 39 40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 41 M*/ 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 46 { 47 PetscErrorCode ierr; 48 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 49 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 51 const PetscInt *ia,*ib; 52 const MatScalar *aa,*bb; 53 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 54 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 55 56 PetscFunctionBegin; 57 *keptrows = 0; 58 ia = a->i; 59 ib = b->i; 60 for (i=0; i<m; i++) { 61 na = ia[i+1] - ia[i]; 62 nb = ib[i+1] - ib[i]; 63 if (!na && !nb) { 64 cnt++; 65 goto ok1; 66 } 67 aa = a->a + ia[i]; 68 for (j=0; j<na; j++) { 69 if (aa[j] != 0.0) goto ok1; 70 } 71 bb = b->a + ib[i]; 72 for (j=0; j <nb; j++) { 73 if (bb[j] != 0.0) goto ok1; 74 } 75 cnt++; 76 ok1:; 77 } 78 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 79 if (!n0rows) PetscFunctionReturn(0); 80 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 81 cnt = 0; 82 for (i=0; i<m; i++) { 83 na = ia[i+1] - ia[i]; 84 nb = ib[i+1] - ib[i]; 85 if (!na && !nb) continue; 86 aa = a->a + ia[i]; 87 for (j=0; j<na;j++) { 88 if (aa[j] != 0.0) { 89 rows[cnt++] = rstart + i; 90 goto ok2; 91 } 92 } 93 bb = b->a + ib[i]; 94 for (j=0; j<nb; j++) { 95 if (bb[j] != 0.0) { 96 rows[cnt++] = rstart + i; 97 goto ok2; 98 } 99 } 100 ok2:; 101 } 102 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 109 { 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 111 PetscErrorCode ierr; 112 PetscInt i,rstart,nrows,*rows; 113 114 PetscFunctionBegin; 115 *zrows = NULL; 116 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 117 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 118 for (i=0; i<nrows; i++) rows[i] += rstart; 119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 126 { 127 PetscErrorCode ierr; 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 129 PetscInt i,n,*garray = aij->garray; 130 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 131 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 132 PetscReal *work; 133 134 PetscFunctionBegin; 135 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 136 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 137 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 */ 181 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 182 { 183 PetscMPIInt rank,size; 184 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 185 PetscErrorCode ierr; 186 Mat mat; 187 Mat_SeqAIJ *gmata; 188 PetscMPIInt tag; 189 MPI_Status status; 190 PetscBool aij; 191 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 192 193 PetscFunctionBegin; 194 CHKMEMQ; 195 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 197 if (!rank) { 198 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 199 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 200 } 201 if (reuse == MAT_INITIAL_MATRIX) { 202 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 203 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 204 if (!rank) { 205 bses[0] = gmat->rmap->bs; 206 bses[1] = gmat->cmap->bs; 207 } 208 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 209 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 210 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 211 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 212 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 213 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 214 215 rowners[0] = 0; 216 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 299 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 300 301 *inmat = mat; 302 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 303 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 304 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 305 mat = *inmat; 306 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 307 if (!rank) { 308 /* send numerical values to other processes */ 309 gmata = (Mat_SeqAIJ*) gmat->data; 310 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 311 gmataa = gmata->a; 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 315 } 316 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 317 } else { 318 /* receive numerical values from process 0*/ 319 nz = Ad->nz + Ao->nz; 320 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 321 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 322 } 323 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 324 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 325 ad = Ad->a; 326 ao = Ao->a; 327 if (mat->rmap->n) { 328 i = 0; 329 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 for (i=1; i<mat->rmap->n; i++) { 333 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 334 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 335 } 336 i--; 337 if (mat->rmap->n) { 338 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 339 } 340 if (rank) { 341 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 342 } 343 } 344 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 345 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 346 CHKMEMQ; 347 PetscFunctionReturn(0); 348 } 349 350 /* 351 Local utility routine that creates a mapping from the global column 352 number to the local number in the off-diagonal part of the local 353 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 354 a slightly higher hash table cost; without it it is not scalable (each processor 355 has an order N integer array but is fast to acess. 356 */ 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 359 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 360 { 361 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt n = aij->B->cmap->n,i; 364 365 PetscFunctionBegin; 366 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 367 #if defined(PETSC_USE_CTABLE) 368 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 369 for (i=0; i<n; i++) { 370 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 371 } 372 #else 373 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 374 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 375 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 377 #endif 378 PetscFunctionReturn(0); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 382 { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col;\ 386 while (high1-low1 > 5) { \ 387 t = (low1+high1)/2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i=low1; _i<high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) ap1[_i] += value; \ 395 else ap1[_i] = value; \ 396 goto a_noinsert; \ 397 } \ 398 } \ 399 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 400 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 401 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 402 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 403 N = nrow1++ - 1; a->nz++; high1++; \ 404 /* shift up all the later entries in this row */ \ 405 for (ii=N; ii>=_i; ii--) { \ 406 rp1[ii+1] = rp1[ii]; \ 407 ap1[ii+1] = ap1[ii]; \ 408 } \ 409 rp1[_i] = col; \ 410 ap1[_i] = value; \ 411 a_noinsert: ; \ 412 ailen[row] = nrow1; \ 413 } 414 415 416 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 417 { \ 418 if (col <= lastcol2) low2 = 0; \ 419 else high2 = nrow2; \ 420 lastcol2 = col; \ 421 while (high2-low2 > 5) { \ 422 t = (low2+high2)/2; \ 423 if (rp2[t] > col) high2 = t; \ 424 else low2 = t; \ 425 } \ 426 for (_i=low2; _i<high2; _i++) { \ 427 if (rp2[_i] > col) break; \ 428 if (rp2[_i] == col) { \ 429 if (addv == ADD_VALUES) ap2[_i] += value; \ 430 else ap2[_i] = value; \ 431 goto b_noinsert; \ 432 } \ 433 } \ 434 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 435 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 437 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 438 N = nrow2++ - 1; b->nz++; high2++; \ 439 /* shift up all the later entries in this row */ \ 440 for (ii=N; ii>=_i; ii--) { \ 441 rp2[ii+1] = rp2[ii]; \ 442 ap2[ii+1] = ap2[ii]; \ 443 } \ 444 rp2[_i] = col; \ 445 ap2[_i] = value; \ 446 b_noinsert: ; \ 447 bilen[row] = nrow2; \ 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 452 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 453 { 454 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 455 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 456 PetscErrorCode ierr; 457 PetscInt l,*garray = mat->garray,diag; 458 459 PetscFunctionBegin; 460 /* code only works for square matrices A */ 461 462 /* find size of row to the left of the diagonal part */ 463 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 464 row = row - diag; 465 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 466 if (garray[b->j[b->i[row]+l]] > diag) break; 467 } 468 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 469 470 /* diagonal part */ 471 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 472 473 /* right of diagonal part */ 474 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatSetValues_MPIAIJ" 480 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 PetscScalar value; 484 PetscErrorCode ierr; 485 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 486 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 487 PetscBool roworiented = aij->roworiented; 488 489 /* Some Variables required in the macro */ 490 Mat A = aij->A; 491 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 492 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 493 MatScalar *aa = a->a; 494 PetscBool ignorezeroentries = a->ignorezeroentries; 495 Mat B = aij->B; 496 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 497 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 498 MatScalar *ba = b->a; 499 500 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 501 PetscInt nonew; 502 MatScalar *ap1,*ap2; 503 504 PetscFunctionBegin; 505 if (v) PetscValidScalarPointer(v,6); 506 for (i=0; i<m; i++) { 507 if (im[i] < 0) continue; 508 #if defined(PETSC_USE_DEBUG) 509 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 510 #endif 511 if (im[i] >= rstart && im[i] < rend) { 512 row = im[i] - rstart; 513 lastcol1 = -1; 514 rp1 = aj + ai[row]; 515 ap1 = aa + ai[row]; 516 rmax1 = aimax[row]; 517 nrow1 = ailen[row]; 518 low1 = 0; 519 high1 = nrow1; 520 lastcol2 = -1; 521 rp2 = bj + bi[row]; 522 ap2 = ba + bi[row]; 523 rmax2 = bimax[row]; 524 nrow2 = bilen[row]; 525 low2 = 0; 526 high2 = nrow2; 527 528 for (j=0; j<n; j++) { 529 if (v) { 530 if (roworiented) value = v[i*n+j]; 531 else value = v[i+j*m]; 532 } else value = 0.0; 533 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 534 if (in[j] >= cstart && in[j] < cend) { 535 col = in[j] - cstart; 536 nonew = a->nonew; 537 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 538 } else if (in[j] < 0) continue; 539 #if defined(PETSC_USE_DEBUG) 540 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 541 #endif 542 else { 543 if (mat->was_assembled) { 544 if (!aij->colmap) { 545 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 546 } 547 #if defined(PETSC_USE_CTABLE) 548 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 549 col--; 550 #else 551 col = aij->colmap[in[j]] - 1; 552 #endif 553 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 554 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 555 col = in[j]; 556 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 557 B = aij->B; 558 b = (Mat_SeqAIJ*)B->data; 559 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 560 rp2 = bj + bi[row]; 561 ap2 = ba + bi[row]; 562 rmax2 = bimax[row]; 563 nrow2 = bilen[row]; 564 low2 = 0; 565 high2 = nrow2; 566 bm = aij->B->rmap->n; 567 ba = b->a; 568 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 569 } else col = in[j]; 570 nonew = b->nonew; 571 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 572 } 573 } 574 } else { 575 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 576 if (!aij->donotstash) { 577 mat->assembled = PETSC_FALSE; 578 if (roworiented) { 579 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 580 } else { 581 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 582 } 583 } 584 } 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatGetValues_MPIAIJ" 591 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 592 { 593 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 596 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 597 598 PetscFunctionBegin; 599 for (i=0; i<m; i++) { 600 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 601 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 602 if (idxm[i] >= rstart && idxm[i] < rend) { 603 row = idxm[i] - rstart; 604 for (j=0; j<n; j++) { 605 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 606 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 607 if (idxn[j] >= cstart && idxn[j] < cend) { 608 col = idxn[j] - cstart; 609 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 610 } else { 611 if (!aij->colmap) { 612 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 613 } 614 #if defined(PETSC_USE_CTABLE) 615 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 616 col--; 617 #else 618 col = aij->colmap[idxn[j]] - 1; 619 #endif 620 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 621 else { 622 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 623 } 624 } 625 } 626 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 627 } 628 PetscFunctionReturn(0); 629 } 630 631 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 635 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 636 { 637 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 638 PetscErrorCode ierr; 639 PetscInt nstash,reallocs; 640 InsertMode addv; 641 642 PetscFunctionBegin; 643 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 644 645 /* make sure all processors are either in INSERTMODE or ADDMODE */ 646 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 647 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 648 mat->insertmode = addv; /* in case this processor had no cache */ 649 650 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 651 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 652 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 659 { 660 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 661 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 662 PetscErrorCode ierr; 663 PetscMPIInt n; 664 PetscInt i,j,rstart,ncols,flg; 665 PetscInt *row,*col; 666 PetscBool other_disassembled; 667 PetscScalar *val; 668 InsertMode addv = mat->insertmode; 669 670 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 671 672 PetscFunctionBegin; 673 if (!aij->donotstash && !mat->nooffprocentries) { 674 while (1) { 675 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 676 if (!flg) break; 677 678 for (i=0; i<n; ) { 679 /* Now identify the consecutive vals belonging to the same row */ 680 for (j=i,rstart=row[j]; j<n; j++) { 681 if (row[j] != rstart) break; 682 } 683 if (j < n) ncols = j-i; 684 else ncols = n-i; 685 /* Now assemble all these values with a single function call */ 686 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 687 688 i = j; 689 } 690 } 691 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 692 } 693 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 694 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 695 696 /* determine if any processor has disassembled, if so we must 697 also disassemble ourselfs, in order that we may reassemble. */ 698 /* 699 if nonzero structure of submatrix B cannot change then we know that 700 no processor disassembled thus we can skip this stuff 701 */ 702 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 703 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 704 if (mat->was_assembled && !other_disassembled) { 705 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 706 } 707 } 708 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 709 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 710 } 711 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 712 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 714 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 715 716 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 717 718 aij->rowvalues = 0; 719 720 /* used by MatAXPY() */ 721 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 722 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 723 724 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 725 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 738 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_MPIAIJ" 744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 745 { 746 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 747 PetscErrorCode ierr; 748 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 749 PetscInt i,*owners = A->rmap->range; 750 PetscInt *nprocs,j,idx,nsends,row; 751 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 752 PetscInt *rvalues,count,base,slen,*source; 753 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 754 MPI_Comm comm; 755 MPI_Request *send_waits,*recv_waits; 756 MPI_Status recv_status,*send_status; 757 const PetscScalar *xx; 758 PetscScalar *bb; 759 #if defined(PETSC_DEBUG) 760 PetscBool found = PETSC_FALSE; 761 #endif 762 763 PetscFunctionBegin; 764 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 765 /* first count number of contributors to each processor */ 766 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 767 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 768 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 769 j = 0; 770 for (i=0; i<N; i++) { 771 if (lastidx > (idx = rows[i])) j = 0; 772 lastidx = idx; 773 for (; j<size; j++) { 774 if (idx >= owners[j] && idx < owners[j+1]) { 775 nprocs[2*j]++; 776 nprocs[2*j+1] = 1; 777 owner[i] = j; 778 #if defined(PETSC_DEBUG) 779 found = PETSC_TRUE; 780 #endif 781 break; 782 } 783 } 784 #if defined(PETSC_DEBUG) 785 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 786 found = PETSC_FALSE; 787 #endif 788 } 789 nsends = 0; 790 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 791 792 if (A->nooffproczerorows) { 793 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 794 nrecvs = nsends; 795 nmax = N; 796 } else { 797 /* inform other processors of number of messages and max length*/ 798 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 799 } 800 801 /* post receives: */ 802 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 804 for (i=0; i<nrecvs; i++) { 805 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 806 } 807 808 /* do sends: 809 1) starts[i] gives the starting index in svalues for stuff going to 810 the ith processor 811 */ 812 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 813 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 814 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 815 816 starts[0] = 0; 817 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 818 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 819 820 starts[0] = 0; 821 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 822 count = 0; 823 for (i=0; i<size; i++) { 824 if (nprocs[2*i+1]) { 825 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 826 } 827 } 828 ierr = PetscFree(starts);CHKERRQ(ierr); 829 830 base = owners[rank]; 831 832 /* wait on receives */ 833 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 834 count = nrecvs; slen = 0; 835 while (count) { 836 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 837 /* unpack receives into our local space */ 838 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 839 840 source[imdex] = recv_status.MPI_SOURCE; 841 lens[imdex] = n; 842 slen += n; 843 count--; 844 } 845 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 846 847 /* move the data into the send scatter */ 848 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 849 count = 0; 850 for (i=0; i<nrecvs; i++) { 851 values = rvalues + i*nmax; 852 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 853 } 854 ierr = PetscFree(rvalues);CHKERRQ(ierr); 855 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 856 ierr = PetscFree(owner);CHKERRQ(ierr); 857 ierr = PetscFree(nprocs);CHKERRQ(ierr); 858 859 /* fix right hand side if needed */ 860 if (x && b) { 861 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 862 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 863 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 864 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 865 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 866 } 867 /* 868 Zero the required rows. If the "diagonal block" of the matrix 869 is square and the user wishes to set the diagonal we use separate 870 code so that MatSetValues() is not called for each diagonal allocating 871 new memory, thus calling lots of mallocs and slowing things down. 872 873 */ 874 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 875 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 876 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 877 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 878 } else if (diag != 0.0) { 879 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 880 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 881 for (i = 0; i < slen; i++) { 882 row = lrows[i] + rstart; 883 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 884 } 885 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 886 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 } else { 888 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(lrows);CHKERRQ(ierr); 891 892 /* wait on sends */ 893 if (nsends) { 894 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 895 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 896 ierr = PetscFree(send_status);CHKERRQ(ierr); 897 } 898 ierr = PetscFree(send_waits);CHKERRQ(ierr); 899 ierr = PetscFree(svalues);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 905 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 906 { 907 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 908 PetscErrorCode ierr; 909 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 910 PetscInt i,*owners = A->rmap->range; 911 PetscInt *nprocs,j,idx,nsends; 912 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 913 PetscInt *rvalues,count,base,slen,*source; 914 PetscInt *lens,*lrows,*values,m; 915 MPI_Comm comm; 916 MPI_Request *send_waits,*recv_waits; 917 MPI_Status recv_status,*send_status; 918 const PetscScalar *xx; 919 PetscScalar *bb,*mask; 920 Vec xmask,lmask; 921 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 922 const PetscInt *aj, *ii,*ridx; 923 PetscScalar *aa; 924 #if defined(PETSC_DEBUG) 925 PetscBool found = PETSC_FALSE; 926 #endif 927 928 PetscFunctionBegin; 929 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 930 /* first count number of contributors to each processor */ 931 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 932 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 933 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 934 j = 0; 935 for (i=0; i<N; i++) { 936 if (lastidx > (idx = rows[i])) j = 0; 937 lastidx = idx; 938 for (; j<size; j++) { 939 if (idx >= owners[j] && idx < owners[j+1]) { 940 nprocs[2*j]++; 941 nprocs[2*j+1] = 1; 942 owner[i] = j; 943 #if defined(PETSC_DEBUG) 944 found = PETSC_TRUE; 945 #endif 946 break; 947 } 948 } 949 #if defined(PETSC_DEBUG) 950 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 951 found = PETSC_FALSE; 952 #endif 953 } 954 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 955 956 /* inform other processors of number of messages and max length*/ 957 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 958 959 /* post receives: */ 960 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 961 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 962 for (i=0; i<nrecvs; i++) { 963 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 964 } 965 966 /* do sends: 967 1) starts[i] gives the starting index in svalues for stuff going to 968 the ith processor 969 */ 970 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 971 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 972 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 973 974 starts[0] = 0; 975 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 976 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 977 978 starts[0] = 0; 979 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 980 count = 0; 981 for (i=0; i<size; i++) { 982 if (nprocs[2*i+1]) { 983 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 984 } 985 } 986 ierr = PetscFree(starts);CHKERRQ(ierr); 987 988 base = owners[rank]; 989 990 /* wait on receives */ 991 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 992 count = nrecvs; slen = 0; 993 while (count) { 994 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 995 /* unpack receives into our local space */ 996 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 997 998 source[imdex] = recv_status.MPI_SOURCE; 999 lens[imdex] = n; 1000 slen += n; 1001 count--; 1002 } 1003 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1004 1005 /* move the data into the send scatter */ 1006 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1007 count = 0; 1008 for (i=0; i<nrecvs; i++) { 1009 values = rvalues + i*nmax; 1010 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1011 } 1012 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1013 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1014 ierr = PetscFree(owner);CHKERRQ(ierr); 1015 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1016 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1017 1018 /* zero diagonal part of matrix */ 1019 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1020 1021 /* handle off diagonal part of matrix */ 1022 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1023 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1024 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1025 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use) { 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++) { 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMult_MPIAIJ" 1101 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 PetscInt nt; 1106 1107 PetscFunctionBegin; 1108 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1109 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1110 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1112 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1119 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1120 { 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 1136 PetscFunctionBegin; 1137 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1139 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1147 { 1148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1149 PetscErrorCode ierr; 1150 PetscBool merged; 1151 1152 PetscFunctionBegin; 1153 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1154 /* do nondiagonal part */ 1155 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1156 if (!merged) { 1157 /* send it on its way */ 1158 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 /* do local part */ 1160 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1161 /* receive remote parts: note this assumes the values are not actually */ 1162 /* added in yy until the next line, */ 1163 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1164 } else { 1165 /* do local part */ 1166 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1167 /* send it on its way */ 1168 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1169 /* values actually were received in the Begin() but we need to call this nop */ 1170 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 EXTERN_C_BEGIN 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 EXTERN_C_END 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1221 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1222 { 1223 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1224 PetscErrorCode ierr; 1225 1226 PetscFunctionBegin; 1227 /* do nondiagonal part */ 1228 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1229 /* send it on its way */ 1230 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1231 /* do local part */ 1232 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1233 /* receive remote parts */ 1234 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 /* 1239 This only works correctly for square matrices where the subblock A->A is the 1240 diagonal block 1241 */ 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1244 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1245 { 1246 PetscErrorCode ierr; 1247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1248 1249 PetscFunctionBegin; 1250 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1251 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1252 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatScale_MPIAIJ" 1258 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1259 { 1260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1261 PetscErrorCode ierr; 1262 1263 PetscFunctionBegin; 1264 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1265 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatDestroy_MPIAIJ" 1271 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1272 { 1273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 #if defined(PETSC_USE_LOG) 1278 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1279 #endif 1280 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1281 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1283 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1284 #if defined(PETSC_USE_CTABLE) 1285 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1286 #else 1287 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1288 #endif 1289 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1290 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1291 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1292 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1293 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1294 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1295 1296 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 PetscErrorCode ierr; 1316 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1317 int fd; 1318 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1319 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1320 PetscScalar *column_values; 1321 PetscInt message_count,flowcontrolcount; 1322 FILE *file; 1323 1324 PetscFunctionBegin; 1325 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1326 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1327 nz = A->nz + B->nz; 1328 if (!rank) { 1329 header[0] = MAT_FILE_CLASSID; 1330 header[1] = mat->rmap->N; 1331 header[2] = mat->cmap->N; 1332 1333 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1334 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1335 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 /* get largest number of rows any processor has */ 1337 rlen = mat->rmap->n; 1338 range = mat->rmap->range; 1339 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1340 } else { 1341 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1342 rlen = mat->rmap->n; 1343 } 1344 1345 /* load up the local row counts */ 1346 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1347 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1348 1349 /* store the row lengths to the file */ 1350 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1351 if (!rank) { 1352 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1353 for (i=1; i<size; i++) { 1354 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1355 rlen = range[i+1] - range[i]; 1356 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1357 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 } 1359 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1360 } else { 1361 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1362 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1363 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1364 } 1365 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1366 1367 /* load up the local column indices */ 1368 nzmax = nz; /* th processor needs space a largest processor needs */ 1369 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1370 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1371 cnt = 0; 1372 for (i=0; i<mat->rmap->n; i++) { 1373 for (j=B->i[i]; j<B->i[i+1]; j++) { 1374 if ((col = garray[B->j[j]]) > cstart) break; 1375 column_indices[cnt++] = col; 1376 } 1377 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1378 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1379 } 1380 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1381 1382 /* store the column indices to the file */ 1383 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1384 if (!rank) { 1385 MPI_Status status; 1386 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1387 for (i=1; i<size; i++) { 1388 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1389 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1390 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1391 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1392 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1393 } 1394 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1395 } else { 1396 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1397 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1399 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1400 } 1401 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1402 1403 /* load up the local column values */ 1404 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1405 cnt = 0; 1406 for (i=0; i<mat->rmap->n; i++) { 1407 for (j=B->i[i]; j<B->i[i+1]; j++) { 1408 if (garray[B->j[j]] > cstart) break; 1409 column_values[cnt++] = B->a[j]; 1410 } 1411 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1412 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1413 } 1414 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1415 1416 /* store the column values to the file */ 1417 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1418 if (!rank) { 1419 MPI_Status status; 1420 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1421 for (i=1; i<size; i++) { 1422 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1423 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1424 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1425 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1426 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1427 } 1428 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1429 } else { 1430 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1431 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1433 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1434 } 1435 ierr = PetscFree(column_values);CHKERRQ(ierr); 1436 1437 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1438 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #include <petscdraw.h> 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1445 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1446 { 1447 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1448 PetscErrorCode ierr; 1449 PetscMPIInt rank = aij->rank,size = aij->size; 1450 PetscBool isdraw,iascii,isbinary; 1451 PetscViewer sviewer; 1452 PetscViewerFormat format; 1453 1454 PetscFunctionBegin; 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1457 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1458 if (iascii) { 1459 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1460 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1461 MatInfo info; 1462 PetscBool inodes; 1463 1464 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1465 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1466 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1468 if (!inodes) { 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1470 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1471 } else { 1472 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1473 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1474 } 1475 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1477 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1479 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1481 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1482 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1485 PetscInt inodecount,inodelimit,*inodes; 1486 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1487 if (inodes) { 1488 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1489 } else { 1490 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1491 } 1492 PetscFunctionReturn(0); 1493 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1494 PetscFunctionReturn(0); 1495 } 1496 } else if (isbinary) { 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1502 } 1503 PetscFunctionReturn(0); 1504 } else if (isdraw) { 1505 PetscDraw draw; 1506 PetscBool isnull; 1507 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1508 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1509 } 1510 1511 if (size == 1) { 1512 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1513 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1514 } else { 1515 /* assemble the entire matrix onto first processor. */ 1516 Mat A; 1517 Mat_SeqAIJ *Aloc; 1518 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1519 MatScalar *a; 1520 1521 if (mat->rmap->N > 1024) { 1522 PetscBool flg = PETSC_FALSE; 1523 1524 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1525 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1526 } 1527 1528 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1529 if (!rank) { 1530 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1531 } else { 1532 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1533 } 1534 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1535 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1536 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1537 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1538 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1539 1540 /* copy over the A part */ 1541 Aloc = (Mat_SeqAIJ*)aij->A->data; 1542 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1545 for (i=0; i<m; i++) { 1546 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1547 row++; 1548 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1549 } 1550 aj = Aloc->j; 1551 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1552 1553 /* copy over the B part */ 1554 Aloc = (Mat_SeqAIJ*)aij->B->data; 1555 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1556 row = mat->rmap->rstart; 1557 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1558 ct = cols; 1559 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1560 for (i=0; i<m; i++) { 1561 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1562 row++; 1563 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1564 } 1565 ierr = PetscFree(ct);CHKERRQ(ierr); 1566 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1568 /* 1569 Everyone has to call to draw the matrix since the graphics waits are 1570 synchronized across all processors that share the PetscDraw object 1571 */ 1572 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1573 if (!rank) { 1574 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1575 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1576 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1577 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1578 } 1579 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1580 ierr = MatDestroy(&A);CHKERRQ(ierr); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatView_MPIAIJ" 1587 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1588 { 1589 PetscErrorCode ierr; 1590 PetscBool iascii,isdraw,issocket,isbinary; 1591 1592 PetscFunctionBegin; 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1596 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1597 if (iascii || isdraw || isbinary || issocket) { 1598 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1599 } 1600 PetscFunctionReturn(0); 1601 } 1602 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatSOR_MPIAIJ" 1605 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1606 { 1607 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1608 PetscErrorCode ierr; 1609 Vec bb1 = 0; 1610 PetscBool hasop; 1611 1612 PetscFunctionBegin; 1613 if (flag == SOR_APPLY_UPPER) { 1614 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1619 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1620 } 1621 1622 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1623 if (flag & SOR_ZERO_INITIAL_GUESS) { 1624 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1625 its--; 1626 } 1627 1628 while (its--) { 1629 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1631 1632 /* update rhs: bb1 = bb - B*x */ 1633 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1634 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1635 1636 /* local sweep */ 1637 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1638 } 1639 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 while (its--) { 1645 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 1648 /* update rhs: bb1 = bb - B*x */ 1649 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1650 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1651 1652 /* local sweep */ 1653 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1654 } 1655 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1656 if (flag & SOR_ZERO_INITIAL_GUESS) { 1657 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1658 its--; 1659 } 1660 while (its--) { 1661 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 1664 /* update rhs: bb1 = bb - B*x */ 1665 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1666 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1667 1668 /* local sweep */ 1669 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1670 } 1671 } else if (flag & SOR_EISENSTAT) { 1672 Vec xx1; 1673 1674 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1675 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1676 1677 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 if (!mat->diag) { 1680 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1681 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1682 } 1683 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1684 if (hasop) { 1685 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1686 } else { 1687 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1688 } 1689 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1690 1691 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1692 1693 /* local sweep */ 1694 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1695 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1696 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1697 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1698 1699 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatPermute_MPIAIJ" 1705 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1706 { 1707 Mat aA,aB,Aperm; 1708 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1709 PetscScalar *aa,*ba; 1710 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1711 PetscSF rowsf,sf; 1712 IS parcolp = NULL; 1713 PetscBool done; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1718 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1719 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1720 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1721 1722 /* Invert row permutation to find out where my rows should go */ 1723 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1724 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1725 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1726 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1727 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1728 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1729 1730 /* Invert column permutation to find out where my columns should go */ 1731 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1732 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1733 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1734 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1735 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1737 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1738 1739 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1740 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1741 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1742 1743 /* Find out where my gcols should go */ 1744 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1745 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1746 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1747 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1748 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1749 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1751 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1752 1753 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1754 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1757 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1758 for (i=0; i<m; i++) { 1759 PetscInt row = rdest[i],rowner; 1760 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1761 for (j=ai[i]; j<ai[i+1]; j++) { 1762 PetscInt cowner,col = cdest[aj[j]]; 1763 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1764 if (rowner == cowner) dnnz[i]++; 1765 else onnz[i]++; 1766 } 1767 for (j=bi[i]; j<bi[i+1]; j++) { 1768 PetscInt cowner,col = gcdest[bj[j]]; 1769 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1770 if (rowner == cowner) dnnz[i]++; 1771 else onnz[i]++; 1772 } 1773 } 1774 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1776 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1780 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1781 1782 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1784 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1785 for (i=0; i<m; i++) { 1786 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1787 PetscInt rowlen; 1788 rowlen = ai[i+1] - ai[i]; 1789 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1790 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1791 rowlen = bi[i+1] - bi[i]; 1792 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1793 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1794 } 1795 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1798 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1800 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1801 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1802 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1803 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1804 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1805 *B = Aperm; 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1811 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1812 { 1813 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1814 Mat A = mat->A,B = mat->B; 1815 PetscErrorCode ierr; 1816 PetscReal isend[5],irecv[5]; 1817 1818 PetscFunctionBegin; 1819 info->block_size = 1.0; 1820 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1821 1822 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1823 isend[3] = info->memory; isend[4] = info->mallocs; 1824 1825 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1826 1827 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1828 isend[3] += info->memory; isend[4] += info->mallocs; 1829 if (flag == MAT_LOCAL) { 1830 info->nz_used = isend[0]; 1831 info->nz_allocated = isend[1]; 1832 info->nz_unneeded = isend[2]; 1833 info->memory = isend[3]; 1834 info->mallocs = isend[4]; 1835 } else if (flag == MAT_GLOBAL_MAX) { 1836 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1837 1838 info->nz_used = irecv[0]; 1839 info->nz_allocated = irecv[1]; 1840 info->nz_unneeded = irecv[2]; 1841 info->memory = irecv[3]; 1842 info->mallocs = irecv[4]; 1843 } else if (flag == MAT_GLOBAL_SUM) { 1844 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1845 1846 info->nz_used = irecv[0]; 1847 info->nz_allocated = irecv[1]; 1848 info->nz_unneeded = irecv[2]; 1849 info->memory = irecv[3]; 1850 info->mallocs = irecv[4]; 1851 } 1852 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1853 info->fill_ratio_needed = 0; 1854 info->factor_mallocs = 0; 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatSetOption_MPIAIJ" 1860 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1861 { 1862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1863 PetscErrorCode ierr; 1864 1865 PetscFunctionBegin; 1866 switch (op) { 1867 case MAT_NEW_NONZERO_LOCATIONS: 1868 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1869 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1870 case MAT_KEEP_NONZERO_PATTERN: 1871 case MAT_NEW_NONZERO_LOCATION_ERR: 1872 case MAT_USE_INODES: 1873 case MAT_IGNORE_ZERO_ENTRIES: 1874 MatCheckPreallocated(A,1); 1875 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1876 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1877 break; 1878 case MAT_ROW_ORIENTED: 1879 a->roworiented = flg; 1880 1881 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1882 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1883 break; 1884 case MAT_NEW_DIAGONALS: 1885 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1886 break; 1887 case MAT_IGNORE_OFF_PROC_ENTRIES: 1888 a->donotstash = flg; 1889 break; 1890 case MAT_SPD: 1891 A->spd_set = PETSC_TRUE; 1892 A->spd = flg; 1893 if (flg) { 1894 A->symmetric = PETSC_TRUE; 1895 A->structurally_symmetric = PETSC_TRUE; 1896 A->symmetric_set = PETSC_TRUE; 1897 A->structurally_symmetric_set = PETSC_TRUE; 1898 } 1899 break; 1900 case MAT_SYMMETRIC: 1901 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1902 break; 1903 case MAT_STRUCTURALLY_SYMMETRIC: 1904 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1905 break; 1906 case MAT_HERMITIAN: 1907 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1908 break; 1909 case MAT_SYMMETRY_ETERNAL: 1910 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1911 break; 1912 default: 1913 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatGetRow_MPIAIJ" 1920 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1921 { 1922 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1923 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1924 PetscErrorCode ierr; 1925 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1926 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1927 PetscInt *cmap,*idx_p; 1928 1929 PetscFunctionBegin; 1930 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1931 mat->getrowactive = PETSC_TRUE; 1932 1933 if (!mat->rowvalues && (idx || v)) { 1934 /* 1935 allocate enough space to hold information from the longest row. 1936 */ 1937 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1938 PetscInt max = 1,tmp; 1939 for (i=0; i<matin->rmap->n; i++) { 1940 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1941 if (max < tmp) max = tmp; 1942 } 1943 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1944 } 1945 1946 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1947 lrow = row - rstart; 1948 1949 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1950 if (!v) {pvA = 0; pvB = 0;} 1951 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1952 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1953 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1954 nztot = nzA + nzB; 1955 1956 cmap = mat->garray; 1957 if (v || idx) { 1958 if (nztot) { 1959 /* Sort by increasing column numbers, assuming A and B already sorted */ 1960 PetscInt imark = -1; 1961 if (v) { 1962 *v = v_p = mat->rowvalues; 1963 for (i=0; i<nzB; i++) { 1964 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1965 else break; 1966 } 1967 imark = i; 1968 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1969 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1970 } 1971 if (idx) { 1972 *idx = idx_p = mat->rowindices; 1973 if (imark > -1) { 1974 for (i=0; i<imark; i++) { 1975 idx_p[i] = cmap[cworkB[i]]; 1976 } 1977 } else { 1978 for (i=0; i<nzB; i++) { 1979 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1980 else break; 1981 } 1982 imark = i; 1983 } 1984 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1985 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1986 } 1987 } else { 1988 if (idx) *idx = 0; 1989 if (v) *v = 0; 1990 } 1991 } 1992 *nz = nztot; 1993 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1994 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2000 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2001 { 2002 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2003 2004 PetscFunctionBegin; 2005 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2006 aij->getrowactive = PETSC_FALSE; 2007 PetscFunctionReturn(0); 2008 } 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatNorm_MPIAIJ" 2012 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2013 { 2014 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2015 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2016 PetscErrorCode ierr; 2017 PetscInt i,j,cstart = mat->cmap->rstart; 2018 PetscReal sum = 0.0; 2019 MatScalar *v; 2020 2021 PetscFunctionBegin; 2022 if (aij->size == 1) { 2023 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2024 } else { 2025 if (type == NORM_FROBENIUS) { 2026 v = amat->a; 2027 for (i=0; i<amat->nz; i++) { 2028 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2029 } 2030 v = bmat->a; 2031 for (i=0; i<bmat->nz; i++) { 2032 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2033 } 2034 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2035 *norm = PetscSqrtReal(*norm); 2036 } else if (type == NORM_1) { /* max column norm */ 2037 PetscReal *tmp,*tmp2; 2038 PetscInt *jj,*garray = aij->garray; 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2040 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2041 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2042 *norm = 0.0; 2043 v = amat->a; jj = amat->j; 2044 for (j=0; j<amat->nz; j++) { 2045 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2046 } 2047 v = bmat->a; jj = bmat->j; 2048 for (j=0; j<bmat->nz; j++) { 2049 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2050 } 2051 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2052 for (j=0; j<mat->cmap->N; j++) { 2053 if (tmp2[j] > *norm) *norm = tmp2[j]; 2054 } 2055 ierr = PetscFree(tmp);CHKERRQ(ierr); 2056 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2057 } else if (type == NORM_INFINITY) { /* max row norm */ 2058 PetscReal ntemp = 0.0; 2059 for (j=0; j<aij->A->rmap->n; j++) { 2060 v = amat->a + amat->i[j]; 2061 sum = 0.0; 2062 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2063 sum += PetscAbsScalar(*v); v++; 2064 } 2065 v = bmat->a + bmat->i[j]; 2066 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2067 sum += PetscAbsScalar(*v); v++; 2068 } 2069 if (sum > ntemp) ntemp = sum; 2070 } 2071 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2072 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatTranspose_MPIAIJ" 2079 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2080 { 2081 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2082 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2083 PetscErrorCode ierr; 2084 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2085 PetscInt cstart = A->cmap->rstart,ncol; 2086 Mat B; 2087 MatScalar *array; 2088 2089 PetscFunctionBegin; 2090 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2091 2092 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2093 ai = Aloc->i; aj = Aloc->j; 2094 bi = Bloc->i; bj = Bloc->j; 2095 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2096 PetscInt *d_nnz,*g_nnz,*o_nnz; 2097 PetscSFNode *oloc; 2098 PETSC_UNUSED PetscSF sf; 2099 2100 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2101 /* compute d_nnz for preallocation */ 2102 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2103 for (i=0; i<ai[ma]; i++) { 2104 d_nnz[aj[i]]++; 2105 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2106 } 2107 /* compute local off-diagonal contributions */ 2108 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2109 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2110 /* map those to global */ 2111 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2112 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2113 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2114 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2115 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2117 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2118 2119 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2120 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2121 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2122 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2123 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2124 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2125 } else { 2126 B = *matout; 2127 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2128 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2129 } 2130 2131 /* copy over the A part */ 2132 array = Aloc->a; 2133 row = A->rmap->rstart; 2134 for (i=0; i<ma; i++) { 2135 ncol = ai[i+1]-ai[i]; 2136 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2137 row++; 2138 array += ncol; aj += ncol; 2139 } 2140 aj = Aloc->j; 2141 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2142 2143 /* copy over the B part */ 2144 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2145 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2146 array = Bloc->a; 2147 row = A->rmap->rstart; 2148 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2149 cols_tmp = cols; 2150 for (i=0; i<mb; i++) { 2151 ncol = bi[i+1]-bi[i]; 2152 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2153 row++; 2154 array += ncol; cols_tmp += ncol; 2155 } 2156 ierr = PetscFree(cols);CHKERRQ(ierr); 2157 2158 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2160 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2161 *matout = B; 2162 } else { 2163 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2170 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2171 { 2172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2173 Mat a = aij->A,b = aij->B; 2174 PetscErrorCode ierr; 2175 PetscInt s1,s2,s3; 2176 2177 PetscFunctionBegin; 2178 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2179 if (rr) { 2180 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2181 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2182 /* Overlap communication with computation. */ 2183 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2184 } 2185 if (ll) { 2186 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2187 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2188 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2189 } 2190 /* scale the diagonal block */ 2191 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2192 2193 if (rr) { 2194 /* Do a scatter end and then right scale the off-diagonal block */ 2195 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2196 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2197 } 2198 PetscFunctionReturn(0); 2199 } 2200 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2203 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2204 { 2205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2206 PetscErrorCode ierr; 2207 2208 PetscFunctionBegin; 2209 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2210 PetscFunctionReturn(0); 2211 } 2212 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "MatEqual_MPIAIJ" 2215 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2216 { 2217 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2218 Mat a,b,c,d; 2219 PetscBool flg; 2220 PetscErrorCode ierr; 2221 2222 PetscFunctionBegin; 2223 a = matA->A; b = matA->B; 2224 c = matB->A; d = matB->B; 2225 2226 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2227 if (flg) { 2228 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2229 } 2230 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 #undef __FUNCT__ 2235 #define __FUNCT__ "MatCopy_MPIAIJ" 2236 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2237 { 2238 PetscErrorCode ierr; 2239 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2240 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2241 2242 PetscFunctionBegin; 2243 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2244 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2245 /* because of the column compression in the off-processor part of the matrix a->B, 2246 the number of columns in a->B and b->B may be different, hence we cannot call 2247 the MatCopy() directly on the two parts. If need be, we can provide a more 2248 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2249 then copying the submatrices */ 2250 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2251 } else { 2252 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2253 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2254 } 2255 PetscFunctionReturn(0); 2256 } 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "MatSetUp_MPIAIJ" 2260 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2261 { 2262 PetscErrorCode ierr; 2263 2264 PetscFunctionBegin; 2265 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2271 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2272 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2273 { 2274 PetscInt i,m=Y->rmap->N; 2275 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2276 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2277 const PetscInt *xi = x->i,*yi = y->i; 2278 2279 PetscFunctionBegin; 2280 /* Set the number of nonzeros in the new matrix */ 2281 for (i=0; i<m; i++) { 2282 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2283 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2284 nnz[i] = 0; 2285 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2286 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2287 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2288 nnz[i]++; 2289 } 2290 for (; k<nzy; k++) nnz[i]++; 2291 } 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatAXPY_MPIAIJ" 2297 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2298 { 2299 PetscErrorCode ierr; 2300 PetscInt i; 2301 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2302 PetscBLASInt bnz,one=1; 2303 Mat_SeqAIJ *x,*y; 2304 2305 PetscFunctionBegin; 2306 if (str == SAME_NONZERO_PATTERN) { 2307 PetscScalar alpha = a; 2308 x = (Mat_SeqAIJ*)xx->A->data; 2309 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2310 y = (Mat_SeqAIJ*)yy->A->data; 2311 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2312 x = (Mat_SeqAIJ*)xx->B->data; 2313 y = (Mat_SeqAIJ*)yy->B->data; 2314 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2315 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2316 } else if (str == SUBSET_NONZERO_PATTERN) { 2317 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2318 2319 x = (Mat_SeqAIJ*)xx->B->data; 2320 y = (Mat_SeqAIJ*)yy->B->data; 2321 if (y->xtoy && y->XtoY != xx->B) { 2322 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2323 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2324 } 2325 if (!y->xtoy) { /* get xtoy */ 2326 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2327 y->XtoY = xx->B; 2328 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2329 } 2330 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2331 } else { 2332 Mat B; 2333 PetscInt *nnz_d,*nnz_o; 2334 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2335 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2336 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2337 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2338 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2339 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2340 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2342 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2343 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2344 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2345 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2347 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2348 } 2349 PetscFunctionReturn(0); 2350 } 2351 2352 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2353 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "MatConjugate_MPIAIJ" 2356 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2357 { 2358 #if defined(PETSC_USE_COMPLEX) 2359 PetscErrorCode ierr; 2360 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2361 2362 PetscFunctionBegin; 2363 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2364 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2365 #else 2366 PetscFunctionBegin; 2367 #endif 2368 PetscFunctionReturn(0); 2369 } 2370 2371 #undef __FUNCT__ 2372 #define __FUNCT__ "MatRealPart_MPIAIJ" 2373 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2374 { 2375 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2376 PetscErrorCode ierr; 2377 2378 PetscFunctionBegin; 2379 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2380 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2386 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2387 { 2388 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2393 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #if defined(PETSC_HAVE_PBGL) 2398 2399 #include <boost/parallel/mpi/bsp_process_group.hpp> 2400 #include <boost/graph/distributed/ilu_default_graph.hpp> 2401 #include <boost/graph/distributed/ilu_0_block.hpp> 2402 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2403 #include <boost/graph/distributed/petsc/interface.hpp> 2404 #include <boost/multi_array.hpp> 2405 #include <boost/parallel/distributed_property_map->hpp> 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2409 /* 2410 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2411 */ 2412 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2413 { 2414 namespace petsc = boost::distributed::petsc; 2415 2416 namespace graph_dist = boost::graph::distributed; 2417 using boost::graph::distributed::ilu_default::process_group_type; 2418 using boost::graph::ilu_permuted; 2419 2420 PetscBool row_identity, col_identity; 2421 PetscContainer c; 2422 PetscInt m, n, M, N; 2423 PetscErrorCode ierr; 2424 2425 PetscFunctionBegin; 2426 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2427 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2428 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2429 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2430 2431 process_group_type pg; 2432 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2433 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2434 lgraph_type& level_graph = *lgraph_p; 2435 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2436 2437 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2438 ilu_permuted(level_graph); 2439 2440 /* put together the new matrix */ 2441 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2442 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2443 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2444 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2445 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2446 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2447 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2449 2450 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2451 ierr = PetscContainerSetPointer(c, lgraph_p); 2452 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2453 ierr = PetscContainerDestroy(&c); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 #undef __FUNCT__ 2458 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2459 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2460 { 2461 PetscFunctionBegin; 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatSolve_MPIAIJ" 2467 /* 2468 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2469 */ 2470 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2471 { 2472 namespace graph_dist = boost::graph::distributed; 2473 2474 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2475 lgraph_type *lgraph_p; 2476 PetscContainer c; 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2481 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2482 ierr = VecCopy(b, x);CHKERRQ(ierr); 2483 2484 PetscScalar *array_x; 2485 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2486 PetscInt sx; 2487 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2488 2489 PetscScalar *array_b; 2490 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2491 PetscInt sb; 2492 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2493 2494 lgraph_type& level_graph = *lgraph_p; 2495 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2496 2497 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2498 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2499 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2500 2501 typedef boost::iterator_property_map<array_ref_type::iterator, 2502 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2503 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2504 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2505 2506 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2507 PetscFunctionReturn(0); 2508 } 2509 #endif 2510 2511 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2512 PetscInt nzlocal,nsends,nrecvs; 2513 PetscMPIInt *send_rank,*recv_rank; 2514 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2515 PetscScalar *sbuf_a,**rbuf_a; 2516 PetscErrorCode (*Destroy)(Mat); 2517 } Mat_Redundant; 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2521 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2522 { 2523 PetscErrorCode ierr; 2524 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2525 PetscInt i; 2526 2527 PetscFunctionBegin; 2528 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2529 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2530 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2531 for (i=0; i<redund->nrecvs; i++) { 2532 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2533 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2534 } 2535 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2536 ierr = PetscFree(redund);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatDestroy_MatRedundant" 2542 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2543 { 2544 PetscErrorCode ierr; 2545 PetscContainer container; 2546 Mat_Redundant *redund=NULL; 2547 2548 PetscFunctionBegin; 2549 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2550 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2551 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2552 2553 A->ops->destroy = redund->Destroy; 2554 2555 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2556 if (A->ops->destroy) { 2557 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2558 } 2559 PetscFunctionReturn(0); 2560 } 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2564 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2565 { 2566 PetscMPIInt rank,size; 2567 MPI_Comm comm; 2568 PetscErrorCode ierr; 2569 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2570 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2571 PetscInt *rowrange = mat->rmap->range; 2572 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2573 Mat A = aij->A,B=aij->B,C=*matredundant; 2574 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2575 PetscScalar *sbuf_a; 2576 PetscInt nzlocal=a->nz+b->nz; 2577 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2578 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2579 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2580 MatScalar *aworkA,*aworkB; 2581 PetscScalar *vals; 2582 PetscMPIInt tag1,tag2,tag3,imdex; 2583 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2584 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2585 MPI_Status recv_status,*send_status; 2586 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2587 PetscInt **rbuf_j=NULL; 2588 PetscScalar **rbuf_a=NULL; 2589 Mat_Redundant *redund =NULL; 2590 PetscContainer container; 2591 2592 PetscFunctionBegin; 2593 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2594 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2595 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2596 2597 if (reuse == MAT_REUSE_MATRIX) { 2598 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2599 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2600 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2601 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2602 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2603 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2604 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2605 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2606 2607 nsends = redund->nsends; 2608 nrecvs = redund->nrecvs; 2609 send_rank = redund->send_rank; 2610 recv_rank = redund->recv_rank; 2611 sbuf_nz = redund->sbuf_nz; 2612 rbuf_nz = redund->rbuf_nz; 2613 sbuf_j = redund->sbuf_j; 2614 sbuf_a = redund->sbuf_a; 2615 rbuf_j = redund->rbuf_j; 2616 rbuf_a = redund->rbuf_a; 2617 } 2618 2619 if (reuse == MAT_INITIAL_MATRIX) { 2620 PetscMPIInt subrank,subsize; 2621 PetscInt nleftover,np_subcomm; 2622 /* get the destination processors' id send_rank, nsends and nrecvs */ 2623 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2624 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2625 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2626 2627 np_subcomm = size/nsubcomm; 2628 nleftover = size - nsubcomm*np_subcomm; 2629 2630 nsends = 0; nrecvs = 0; 2631 for (i=0; i<size; i++) { /* i=rank*/ 2632 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2633 send_rank[nsends] = i; nsends++; 2634 recv_rank[nrecvs++] = i; 2635 } 2636 } 2637 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2638 i = size-nleftover-1; 2639 j = 0; 2640 while (j < nsubcomm - nleftover) { 2641 send_rank[nsends++] = i; 2642 i--; j++; 2643 } 2644 } 2645 2646 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2647 for (i=0; i<nleftover; i++) { 2648 recv_rank[nrecvs++] = size-nleftover+i; 2649 } 2650 } 2651 2652 /* allocate sbuf_j, sbuf_a */ 2653 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2654 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2655 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2656 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2657 2658 /* copy mat's local entries into the buffers */ 2659 if (reuse == MAT_INITIAL_MATRIX) { 2660 rownz_max = 0; 2661 rptr = sbuf_j; 2662 cols = sbuf_j + rend-rstart + 1; 2663 vals = sbuf_a; 2664 rptr[0] = 0; 2665 for (i=0; i<rend-rstart; i++) { 2666 row = i + rstart; 2667 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2668 ncols = nzA + nzB; 2669 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2670 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2671 /* load the column indices for this row into cols */ 2672 lwrite = 0; 2673 for (l=0; l<nzB; l++) { 2674 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2675 vals[lwrite] = aworkB[l]; 2676 cols[lwrite++] = ctmp; 2677 } 2678 } 2679 for (l=0; l<nzA; l++) { 2680 vals[lwrite] = aworkA[l]; 2681 cols[lwrite++] = cstart + cworkA[l]; 2682 } 2683 for (l=0; l<nzB; l++) { 2684 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2685 vals[lwrite] = aworkB[l]; 2686 cols[lwrite++] = ctmp; 2687 } 2688 } 2689 vals += ncols; 2690 cols += ncols; 2691 rptr[i+1] = rptr[i] + ncols; 2692 if (rownz_max < ncols) rownz_max = ncols; 2693 } 2694 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2695 } else { /* only copy matrix values into sbuf_a */ 2696 rptr = sbuf_j; 2697 vals = sbuf_a; 2698 rptr[0] = 0; 2699 for (i=0; i<rend-rstart; i++) { 2700 row = i + rstart; 2701 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2702 ncols = nzA + nzB; 2703 cworkB = b->j + b->i[i]; 2704 aworkA = a->a + a->i[i]; 2705 aworkB = b->a + b->i[i]; 2706 lwrite = 0; 2707 for (l=0; l<nzB; l++) { 2708 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2709 } 2710 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2711 for (l=0; l<nzB; l++) { 2712 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2713 } 2714 vals += ncols; 2715 rptr[i+1] = rptr[i] + ncols; 2716 } 2717 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2718 2719 /* send nzlocal to others, and recv other's nzlocal */ 2720 /*--------------------------------------------------*/ 2721 if (reuse == MAT_INITIAL_MATRIX) { 2722 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2723 2724 s_waits2 = s_waits3 + nsends; 2725 s_waits1 = s_waits2 + nsends; 2726 r_waits1 = s_waits1 + nsends; 2727 r_waits2 = r_waits1 + nrecvs; 2728 r_waits3 = r_waits2 + nrecvs; 2729 } else { 2730 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2731 2732 r_waits3 = s_waits3 + nsends; 2733 } 2734 2735 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2736 if (reuse == MAT_INITIAL_MATRIX) { 2737 /* get new tags to keep the communication clean */ 2738 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2739 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2740 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2741 2742 /* post receives of other's nzlocal */ 2743 for (i=0; i<nrecvs; i++) { 2744 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2745 } 2746 /* send nzlocal to others */ 2747 for (i=0; i<nsends; i++) { 2748 sbuf_nz[i] = nzlocal; 2749 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2750 } 2751 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2752 count = nrecvs; 2753 while (count) { 2754 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2755 2756 recv_rank[imdex] = recv_status.MPI_SOURCE; 2757 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2758 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2759 2760 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2761 2762 rbuf_nz[imdex] += i + 2; 2763 2764 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2765 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2766 count--; 2767 } 2768 /* wait on sends of nzlocal */ 2769 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2770 /* send mat->i,j to others, and recv from other's */ 2771 /*------------------------------------------------*/ 2772 for (i=0; i<nsends; i++) { 2773 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2774 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2775 } 2776 /* wait on receives of mat->i,j */ 2777 /*------------------------------*/ 2778 count = nrecvs; 2779 while (count) { 2780 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2781 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2782 count--; 2783 } 2784 /* wait on sends of mat->i,j */ 2785 /*---------------------------*/ 2786 if (nsends) { 2787 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2788 } 2789 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2790 2791 /* post receives, send and receive mat->a */ 2792 /*----------------------------------------*/ 2793 for (imdex=0; imdex<nrecvs; imdex++) { 2794 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2795 } 2796 for (i=0; i<nsends; i++) { 2797 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2798 } 2799 count = nrecvs; 2800 while (count) { 2801 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2802 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2803 count--; 2804 } 2805 if (nsends) { 2806 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2807 } 2808 2809 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2810 2811 /* create redundant matrix */ 2812 /*-------------------------*/ 2813 if (reuse == MAT_INITIAL_MATRIX) { 2814 /* compute rownz_max for preallocation */ 2815 for (imdex=0; imdex<nrecvs; imdex++) { 2816 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2817 rptr = rbuf_j[imdex]; 2818 for (i=0; i<j; i++) { 2819 ncols = rptr[i+1] - rptr[i]; 2820 if (rownz_max < ncols) rownz_max = ncols; 2821 } 2822 } 2823 2824 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2825 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2826 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2827 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2828 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2829 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2830 } else { 2831 C = *matredundant; 2832 } 2833 2834 /* insert local matrix entries */ 2835 rptr = sbuf_j; 2836 cols = sbuf_j + rend-rstart + 1; 2837 vals = sbuf_a; 2838 for (i=0; i<rend-rstart; i++) { 2839 row = i + rstart; 2840 ncols = rptr[i+1] - rptr[i]; 2841 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2842 vals += ncols; 2843 cols += ncols; 2844 } 2845 /* insert received matrix entries */ 2846 for (imdex=0; imdex<nrecvs; imdex++) { 2847 rstart = rowrange[recv_rank[imdex]]; 2848 rend = rowrange[recv_rank[imdex]+1]; 2849 rptr = rbuf_j[imdex]; 2850 cols = rbuf_j[imdex] + rend-rstart + 1; 2851 vals = rbuf_a[imdex]; 2852 for (i=0; i<rend-rstart; i++) { 2853 row = i + rstart; 2854 ncols = rptr[i+1] - rptr[i]; 2855 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2856 vals += ncols; 2857 cols += ncols; 2858 } 2859 } 2860 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2863 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2864 if (reuse == MAT_INITIAL_MATRIX) { 2865 PetscContainer container; 2866 *matredundant = C; 2867 /* create a supporting struct and attach it to C for reuse */ 2868 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2869 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2870 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2871 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2872 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2873 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2874 2875 redund->nzlocal = nzlocal; 2876 redund->nsends = nsends; 2877 redund->nrecvs = nrecvs; 2878 redund->send_rank = send_rank; 2879 redund->recv_rank = recv_rank; 2880 redund->sbuf_nz = sbuf_nz; 2881 redund->rbuf_nz = rbuf_nz; 2882 redund->sbuf_j = sbuf_j; 2883 redund->sbuf_a = sbuf_a; 2884 redund->rbuf_j = rbuf_j; 2885 redund->rbuf_a = rbuf_a; 2886 2887 redund->Destroy = C->ops->destroy; 2888 C->ops->destroy = MatDestroy_MatRedundant; 2889 } 2890 PetscFunctionReturn(0); 2891 } 2892 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2895 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2896 { 2897 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2898 PetscErrorCode ierr; 2899 PetscInt i,*idxb = 0; 2900 PetscScalar *va,*vb; 2901 Vec vtmp; 2902 2903 PetscFunctionBegin; 2904 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2905 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2906 if (idx) { 2907 for (i=0; i<A->rmap->n; i++) { 2908 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2909 } 2910 } 2911 2912 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2913 if (idx) { 2914 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2915 } 2916 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2917 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2918 2919 for (i=0; i<A->rmap->n; i++) { 2920 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2921 va[i] = vb[i]; 2922 if (idx) idx[i] = a->garray[idxb[i]]; 2923 } 2924 } 2925 2926 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2928 ierr = PetscFree(idxb);CHKERRQ(ierr); 2929 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2930 PetscFunctionReturn(0); 2931 } 2932 2933 #undef __FUNCT__ 2934 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2935 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2936 { 2937 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2938 PetscErrorCode ierr; 2939 PetscInt i,*idxb = 0; 2940 PetscScalar *va,*vb; 2941 Vec vtmp; 2942 2943 PetscFunctionBegin; 2944 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2945 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2946 if (idx) { 2947 for (i=0; i<A->cmap->n; i++) { 2948 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2949 } 2950 } 2951 2952 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2953 if (idx) { 2954 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2955 } 2956 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2957 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2958 2959 for (i=0; i<A->rmap->n; i++) { 2960 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2961 va[i] = vb[i]; 2962 if (idx) idx[i] = a->garray[idxb[i]]; 2963 } 2964 } 2965 2966 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2967 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2968 ierr = PetscFree(idxb);CHKERRQ(ierr); 2969 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2975 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2976 { 2977 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2978 PetscInt n = A->rmap->n; 2979 PetscInt cstart = A->cmap->rstart; 2980 PetscInt *cmap = mat->garray; 2981 PetscInt *diagIdx, *offdiagIdx; 2982 Vec diagV, offdiagV; 2983 PetscScalar *a, *diagA, *offdiagA; 2984 PetscInt r; 2985 PetscErrorCode ierr; 2986 2987 PetscFunctionBegin; 2988 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2989 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2990 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2991 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2992 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2993 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2994 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2995 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2996 for (r = 0; r < n; ++r) { 2997 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2998 a[r] = diagA[r]; 2999 idx[r] = cstart + diagIdx[r]; 3000 } else { 3001 a[r] = offdiagA[r]; 3002 idx[r] = cmap[offdiagIdx[r]]; 3003 } 3004 } 3005 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3006 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3007 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3008 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3009 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3010 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3011 PetscFunctionReturn(0); 3012 } 3013 3014 #undef __FUNCT__ 3015 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3016 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3017 { 3018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3019 PetscInt n = A->rmap->n; 3020 PetscInt cstart = A->cmap->rstart; 3021 PetscInt *cmap = mat->garray; 3022 PetscInt *diagIdx, *offdiagIdx; 3023 Vec diagV, offdiagV; 3024 PetscScalar *a, *diagA, *offdiagA; 3025 PetscInt r; 3026 PetscErrorCode ierr; 3027 3028 PetscFunctionBegin; 3029 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3030 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3031 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3032 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3033 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3034 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3035 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3036 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3037 for (r = 0; r < n; ++r) { 3038 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3039 a[r] = diagA[r]; 3040 idx[r] = cstart + diagIdx[r]; 3041 } else { 3042 a[r] = offdiagA[r]; 3043 idx[r] = cmap[offdiagIdx[r]]; 3044 } 3045 } 3046 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3047 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3048 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3049 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3050 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3051 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 #undef __FUNCT__ 3056 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3057 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3058 { 3059 PetscErrorCode ierr; 3060 Mat *dummy; 3061 3062 PetscFunctionBegin; 3063 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3064 *newmat = *dummy; 3065 ierr = PetscFree(dummy);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3073 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3074 { 3075 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3076 PetscErrorCode ierr; 3077 3078 PetscFunctionBegin; 3079 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3080 PetscFunctionReturn(0); 3081 } 3082 3083 #undef __FUNCT__ 3084 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3085 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3086 { 3087 PetscErrorCode ierr; 3088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3089 3090 PetscFunctionBegin; 3091 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3092 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3093 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3094 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3095 PetscFunctionReturn(0); 3096 } 3097 3098 /* -------------------------------------------------------------------*/ 3099 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3100 MatGetRow_MPIAIJ, 3101 MatRestoreRow_MPIAIJ, 3102 MatMult_MPIAIJ, 3103 /* 4*/ MatMultAdd_MPIAIJ, 3104 MatMultTranspose_MPIAIJ, 3105 MatMultTransposeAdd_MPIAIJ, 3106 #if defined(PETSC_HAVE_PBGL) 3107 MatSolve_MPIAIJ, 3108 #else 3109 0, 3110 #endif 3111 0, 3112 0, 3113 /*10*/ 0, 3114 0, 3115 0, 3116 MatSOR_MPIAIJ, 3117 MatTranspose_MPIAIJ, 3118 /*15*/ MatGetInfo_MPIAIJ, 3119 MatEqual_MPIAIJ, 3120 MatGetDiagonal_MPIAIJ, 3121 MatDiagonalScale_MPIAIJ, 3122 MatNorm_MPIAIJ, 3123 /*20*/ MatAssemblyBegin_MPIAIJ, 3124 MatAssemblyEnd_MPIAIJ, 3125 MatSetOption_MPIAIJ, 3126 MatZeroEntries_MPIAIJ, 3127 /*24*/ MatZeroRows_MPIAIJ, 3128 0, 3129 #if defined(PETSC_HAVE_PBGL) 3130 0, 3131 #else 3132 0, 3133 #endif 3134 0, 3135 0, 3136 /*29*/ MatSetUp_MPIAIJ, 3137 #if defined(PETSC_HAVE_PBGL) 3138 0, 3139 #else 3140 0, 3141 #endif 3142 0, 3143 0, 3144 0, 3145 /*34*/ MatDuplicate_MPIAIJ, 3146 0, 3147 0, 3148 0, 3149 0, 3150 /*39*/ MatAXPY_MPIAIJ, 3151 MatGetSubMatrices_MPIAIJ, 3152 MatIncreaseOverlap_MPIAIJ, 3153 MatGetValues_MPIAIJ, 3154 MatCopy_MPIAIJ, 3155 /*44*/ MatGetRowMax_MPIAIJ, 3156 MatScale_MPIAIJ, 3157 0, 3158 0, 3159 MatZeroRowsColumns_MPIAIJ, 3160 /*49*/ MatSetRandom_MPIAIJ, 3161 0, 3162 0, 3163 0, 3164 0, 3165 /*54*/ MatFDColoringCreate_MPIAIJ, 3166 0, 3167 MatSetUnfactored_MPIAIJ, 3168 MatPermute_MPIAIJ, 3169 0, 3170 /*59*/ MatGetSubMatrix_MPIAIJ, 3171 MatDestroy_MPIAIJ, 3172 MatView_MPIAIJ, 3173 0, 3174 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3175 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3176 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3177 0, 3178 0, 3179 0, 3180 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3181 MatGetRowMinAbs_MPIAIJ, 3182 0, 3183 MatSetColoring_MPIAIJ, 3184 0, 3185 MatSetValuesAdifor_MPIAIJ, 3186 /*75*/ MatFDColoringApply_AIJ, 3187 0, 3188 0, 3189 0, 3190 MatFindZeroDiagonals_MPIAIJ, 3191 /*80*/ 0, 3192 0, 3193 0, 3194 /*83*/ MatLoad_MPIAIJ, 3195 0, 3196 0, 3197 0, 3198 0, 3199 0, 3200 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3201 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3202 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3203 MatPtAP_MPIAIJ_MPIAIJ, 3204 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3205 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3206 0, 3207 0, 3208 0, 3209 0, 3210 /*99*/ 0, 3211 0, 3212 0, 3213 MatConjugate_MPIAIJ, 3214 0, 3215 /*104*/MatSetValuesRow_MPIAIJ, 3216 MatRealPart_MPIAIJ, 3217 MatImaginaryPart_MPIAIJ, 3218 0, 3219 0, 3220 /*109*/0, 3221 MatGetRedundantMatrix_MPIAIJ, 3222 MatGetRowMin_MPIAIJ, 3223 0, 3224 0, 3225 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3226 0, 3227 0, 3228 0, 3229 0, 3230 /*119*/0, 3231 0, 3232 0, 3233 0, 3234 MatGetMultiProcBlock_MPIAIJ, 3235 /*124*/MatFindNonzeroRows_MPIAIJ, 3236 MatGetColumnNorms_MPIAIJ, 3237 MatInvertBlockDiagonal_MPIAIJ, 3238 0, 3239 MatGetSubMatricesParallel_MPIAIJ, 3240 /*129*/0, 3241 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3242 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3243 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3244 0, 3245 /*134*/0, 3246 0, 3247 0, 3248 0, 3249 0 3250 }; 3251 3252 /* ----------------------------------------------------------------------------------------*/ 3253 3254 EXTERN_C_BEGIN 3255 #undef __FUNCT__ 3256 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3257 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3258 { 3259 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3260 PetscErrorCode ierr; 3261 3262 PetscFunctionBegin; 3263 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3264 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 EXTERN_C_END 3268 3269 EXTERN_C_BEGIN 3270 #undef __FUNCT__ 3271 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3272 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3273 { 3274 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3275 PetscErrorCode ierr; 3276 3277 PetscFunctionBegin; 3278 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3279 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3280 PetscFunctionReturn(0); 3281 } 3282 EXTERN_C_END 3283 3284 EXTERN_C_BEGIN 3285 #undef __FUNCT__ 3286 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3287 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3288 { 3289 Mat_MPIAIJ *b; 3290 PetscErrorCode ierr; 3291 PetscInt i; 3292 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3293 3294 PetscFunctionBegin; 3295 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3296 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3297 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3298 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3299 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3300 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3301 3302 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3303 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3304 if (d_nnz) { 3305 for (i=0; i<B->rmap->n; i++) { 3306 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3307 } 3308 } 3309 if (o_nnz) { 3310 for (i=0; i<B->rmap->n; i++) { 3311 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3312 } 3313 } 3314 b = (Mat_MPIAIJ*)B->data; 3315 3316 if (!B->preallocated) { 3317 /* Explicitly create 2 MATSEQAIJ matrices. */ 3318 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3319 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3320 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3321 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3322 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3323 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3324 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3325 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3326 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3327 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3328 } 3329 3330 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3331 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3332 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3333 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3334 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3335 B->preallocated = PETSC_TRUE; 3336 PetscFunctionReturn(0); 3337 } 3338 EXTERN_C_END 3339 3340 #undef __FUNCT__ 3341 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3342 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3343 { 3344 Mat mat; 3345 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3346 PetscErrorCode ierr; 3347 3348 PetscFunctionBegin; 3349 *newmat = 0; 3350 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3351 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3352 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3353 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3354 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3355 a = (Mat_MPIAIJ*)mat->data; 3356 3357 mat->factortype = matin->factortype; 3358 mat->rmap->bs = matin->rmap->bs; 3359 mat->cmap->bs = matin->cmap->bs; 3360 mat->assembled = PETSC_TRUE; 3361 mat->insertmode = NOT_SET_VALUES; 3362 mat->preallocated = PETSC_TRUE; 3363 3364 a->size = oldmat->size; 3365 a->rank = oldmat->rank; 3366 a->donotstash = oldmat->donotstash; 3367 a->roworiented = oldmat->roworiented; 3368 a->rowindices = 0; 3369 a->rowvalues = 0; 3370 a->getrowactive = PETSC_FALSE; 3371 3372 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3373 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3374 3375 if (oldmat->colmap) { 3376 #if defined(PETSC_USE_CTABLE) 3377 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3378 #else 3379 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3380 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3381 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3382 #endif 3383 } else a->colmap = 0; 3384 if (oldmat->garray) { 3385 PetscInt len; 3386 len = oldmat->B->cmap->n; 3387 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3388 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3389 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3390 } else a->garray = 0; 3391 3392 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3393 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3394 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3395 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3396 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3397 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3398 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3399 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3400 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3401 *newmat = mat; 3402 PetscFunctionReturn(0); 3403 } 3404 3405 3406 3407 #undef __FUNCT__ 3408 #define __FUNCT__ "MatLoad_MPIAIJ" 3409 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3410 { 3411 PetscScalar *vals,*svals; 3412 MPI_Comm comm; 3413 PetscErrorCode ierr; 3414 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3415 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3416 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3417 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3418 PetscInt cend,cstart,n,*rowners,sizesset=1; 3419 int fd; 3420 PetscInt bs = 1; 3421 3422 PetscFunctionBegin; 3423 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3424 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3425 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3426 if (!rank) { 3427 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3428 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3429 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3430 } 3431 3432 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3433 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3434 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3435 3436 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3437 3438 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3439 M = header[1]; N = header[2]; 3440 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3441 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3442 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3443 3444 /* If global sizes are set, check if they are consistent with that given in the file */ 3445 if (sizesset) { 3446 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3447 } 3448 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3449 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3450 3451 /* determine ownership of all (block) rows */ 3452 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3453 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3454 else m = newMat->rmap->n; /* Set by user */ 3455 3456 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3457 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3458 3459 /* First process needs enough room for process with most rows */ 3460 if (!rank) { 3461 mmax = rowners[1]; 3462 for (i=2; i<=size; i++) { 3463 mmax = PetscMax(mmax, rowners[i]); 3464 } 3465 } else mmax = m; 3466 3467 rowners[0] = 0; 3468 for (i=2; i<=size; i++) { 3469 rowners[i] += rowners[i-1]; 3470 } 3471 rstart = rowners[rank]; 3472 rend = rowners[rank+1]; 3473 3474 /* distribute row lengths to all processors */ 3475 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3476 if (!rank) { 3477 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3478 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3479 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3480 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3481 for (j=0; j<m; j++) { 3482 procsnz[0] += ourlens[j]; 3483 } 3484 for (i=1; i<size; i++) { 3485 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3486 /* calculate the number of nonzeros on each processor */ 3487 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3488 procsnz[i] += rowlengths[j]; 3489 } 3490 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3491 } 3492 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3493 } else { 3494 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3495 } 3496 3497 if (!rank) { 3498 /* determine max buffer needed and allocate it */ 3499 maxnz = 0; 3500 for (i=0; i<size; i++) { 3501 maxnz = PetscMax(maxnz,procsnz[i]); 3502 } 3503 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3504 3505 /* read in my part of the matrix column indices */ 3506 nz = procsnz[0]; 3507 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3508 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3509 3510 /* read in every one elses and ship off */ 3511 for (i=1; i<size; i++) { 3512 nz = procsnz[i]; 3513 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3514 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3515 } 3516 ierr = PetscFree(cols);CHKERRQ(ierr); 3517 } else { 3518 /* determine buffer space needed for message */ 3519 nz = 0; 3520 for (i=0; i<m; i++) { 3521 nz += ourlens[i]; 3522 } 3523 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3524 3525 /* receive message of column indices*/ 3526 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3527 } 3528 3529 /* determine column ownership if matrix is not square */ 3530 if (N != M) { 3531 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3532 else n = newMat->cmap->n; 3533 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3534 cstart = cend - n; 3535 } else { 3536 cstart = rstart; 3537 cend = rend; 3538 n = cend - cstart; 3539 } 3540 3541 /* loop over local rows, determining number of off diagonal entries */ 3542 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3543 jj = 0; 3544 for (i=0; i<m; i++) { 3545 for (j=0; j<ourlens[i]; j++) { 3546 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3547 jj++; 3548 } 3549 } 3550 3551 for (i=0; i<m; i++) { 3552 ourlens[i] -= offlens[i]; 3553 } 3554 if (!sizesset) { 3555 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3556 } 3557 3558 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3559 3560 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3561 3562 for (i=0; i<m; i++) { 3563 ourlens[i] += offlens[i]; 3564 } 3565 3566 if (!rank) { 3567 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3568 3569 /* read in my part of the matrix numerical values */ 3570 nz = procsnz[0]; 3571 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3572 3573 /* insert into matrix */ 3574 jj = rstart; 3575 smycols = mycols; 3576 svals = vals; 3577 for (i=0; i<m; i++) { 3578 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3579 smycols += ourlens[i]; 3580 svals += ourlens[i]; 3581 jj++; 3582 } 3583 3584 /* read in other processors and ship out */ 3585 for (i=1; i<size; i++) { 3586 nz = procsnz[i]; 3587 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3588 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3589 } 3590 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3591 } else { 3592 /* receive numeric values */ 3593 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3594 3595 /* receive message of values*/ 3596 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3597 3598 /* insert into matrix */ 3599 jj = rstart; 3600 smycols = mycols; 3601 svals = vals; 3602 for (i=0; i<m; i++) { 3603 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3604 smycols += ourlens[i]; 3605 svals += ourlens[i]; 3606 jj++; 3607 } 3608 } 3609 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3610 ierr = PetscFree(vals);CHKERRQ(ierr); 3611 ierr = PetscFree(mycols);CHKERRQ(ierr); 3612 ierr = PetscFree(rowners);CHKERRQ(ierr); 3613 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3614 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3615 PetscFunctionReturn(0); 3616 } 3617 3618 #undef __FUNCT__ 3619 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3620 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3621 { 3622 PetscErrorCode ierr; 3623 IS iscol_local; 3624 PetscInt csize; 3625 3626 PetscFunctionBegin; 3627 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3628 if (call == MAT_REUSE_MATRIX) { 3629 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3630 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3631 } else { 3632 PetscInt cbs; 3633 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3634 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3635 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3636 } 3637 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3638 if (call == MAT_INITIAL_MATRIX) { 3639 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3640 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3641 } 3642 PetscFunctionReturn(0); 3643 } 3644 3645 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3646 #undef __FUNCT__ 3647 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3648 /* 3649 Not great since it makes two copies of the submatrix, first an SeqAIJ 3650 in local and then by concatenating the local matrices the end result. 3651 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3652 3653 Note: This requires a sequential iscol with all indices. 3654 */ 3655 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3656 { 3657 PetscErrorCode ierr; 3658 PetscMPIInt rank,size; 3659 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3660 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3661 PetscBool allcolumns, colflag; 3662 Mat M,Mreuse; 3663 MatScalar *vwork,*aa; 3664 MPI_Comm comm; 3665 Mat_SeqAIJ *aij; 3666 3667 PetscFunctionBegin; 3668 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3669 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3670 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3671 3672 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3673 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3674 if (colflag && ncol == mat->cmap->N) { 3675 allcolumns = PETSC_TRUE; 3676 } else { 3677 allcolumns = PETSC_FALSE; 3678 } 3679 if (call == MAT_REUSE_MATRIX) { 3680 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3681 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3682 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3683 } else { 3684 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3685 } 3686 3687 /* 3688 m - number of local rows 3689 n - number of columns (same on all processors) 3690 rstart - first row in new global matrix generated 3691 */ 3692 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3693 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3694 if (call == MAT_INITIAL_MATRIX) { 3695 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3696 ii = aij->i; 3697 jj = aij->j; 3698 3699 /* 3700 Determine the number of non-zeros in the diagonal and off-diagonal 3701 portions of the matrix in order to do correct preallocation 3702 */ 3703 3704 /* first get start and end of "diagonal" columns */ 3705 if (csize == PETSC_DECIDE) { 3706 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3707 if (mglobal == n) { /* square matrix */ 3708 nlocal = m; 3709 } else { 3710 nlocal = n/size + ((n % size) > rank); 3711 } 3712 } else { 3713 nlocal = csize; 3714 } 3715 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3716 rstart = rend - nlocal; 3717 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3718 3719 /* next, compute all the lengths */ 3720 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3721 olens = dlens + m; 3722 for (i=0; i<m; i++) { 3723 jend = ii[i+1] - ii[i]; 3724 olen = 0; 3725 dlen = 0; 3726 for (j=0; j<jend; j++) { 3727 if (*jj < rstart || *jj >= rend) olen++; 3728 else dlen++; 3729 jj++; 3730 } 3731 olens[i] = olen; 3732 dlens[i] = dlen; 3733 } 3734 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3735 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3736 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3737 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3738 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3739 ierr = PetscFree(dlens);CHKERRQ(ierr); 3740 } else { 3741 PetscInt ml,nl; 3742 3743 M = *newmat; 3744 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3745 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3746 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3747 /* 3748 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3749 rather than the slower MatSetValues(). 3750 */ 3751 M->was_assembled = PETSC_TRUE; 3752 M->assembled = PETSC_FALSE; 3753 } 3754 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3755 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3756 ii = aij->i; 3757 jj = aij->j; 3758 aa = aij->a; 3759 for (i=0; i<m; i++) { 3760 row = rstart + i; 3761 nz = ii[i+1] - ii[i]; 3762 cwork = jj; jj += nz; 3763 vwork = aa; aa += nz; 3764 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3765 } 3766 3767 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3768 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3769 *newmat = M; 3770 3771 /* save submatrix used in processor for next request */ 3772 if (call == MAT_INITIAL_MATRIX) { 3773 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3774 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3775 } 3776 PetscFunctionReturn(0); 3777 } 3778 3779 EXTERN_C_BEGIN 3780 #undef __FUNCT__ 3781 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3782 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3783 { 3784 PetscInt m,cstart, cend,j,nnz,i,d; 3785 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3786 const PetscInt *JJ; 3787 PetscScalar *values; 3788 PetscErrorCode ierr; 3789 3790 PetscFunctionBegin; 3791 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3792 3793 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3794 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3795 m = B->rmap->n; 3796 cstart = B->cmap->rstart; 3797 cend = B->cmap->rend; 3798 rstart = B->rmap->rstart; 3799 3800 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3801 3802 #if defined(PETSC_USE_DEBUGGING) 3803 for (i=0; i<m; i++) { 3804 nnz = Ii[i+1]- Ii[i]; 3805 JJ = J + Ii[i]; 3806 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3807 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3808 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3809 } 3810 #endif 3811 3812 for (i=0; i<m; i++) { 3813 nnz = Ii[i+1]- Ii[i]; 3814 JJ = J + Ii[i]; 3815 nnz_max = PetscMax(nnz_max,nnz); 3816 d = 0; 3817 for (j=0; j<nnz; j++) { 3818 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3819 } 3820 d_nnz[i] = d; 3821 o_nnz[i] = nnz - d; 3822 } 3823 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3824 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3825 3826 if (v) values = (PetscScalar*)v; 3827 else { 3828 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3829 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3830 } 3831 3832 for (i=0; i<m; i++) { 3833 ii = i + rstart; 3834 nnz = Ii[i+1]- Ii[i]; 3835 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3836 } 3837 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3838 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3839 3840 if (!v) { 3841 ierr = PetscFree(values);CHKERRQ(ierr); 3842 } 3843 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3844 PetscFunctionReturn(0); 3845 } 3846 EXTERN_C_END 3847 3848 #undef __FUNCT__ 3849 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3850 /*@ 3851 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3852 (the default parallel PETSc format). 3853 3854 Collective on MPI_Comm 3855 3856 Input Parameters: 3857 + B - the matrix 3858 . i - the indices into j for the start of each local row (starts with zero) 3859 . j - the column indices for each local row (starts with zero) 3860 - v - optional values in the matrix 3861 3862 Level: developer 3863 3864 Notes: 3865 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3866 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3867 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3868 3869 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3870 3871 The format which is used for the sparse matrix input, is equivalent to a 3872 row-major ordering.. i.e for the following matrix, the input data expected is 3873 as shown: 3874 3875 1 0 0 3876 2 0 3 P0 3877 ------- 3878 4 5 6 P1 3879 3880 Process0 [P0]: rows_owned=[0,1] 3881 i = {0,1,3} [size = nrow+1 = 2+1] 3882 j = {0,0,2} [size = nz = 6] 3883 v = {1,2,3} [size = nz = 6] 3884 3885 Process1 [P1]: rows_owned=[2] 3886 i = {0,3} [size = nrow+1 = 1+1] 3887 j = {0,1,2} [size = nz = 6] 3888 v = {4,5,6} [size = nz = 6] 3889 3890 .keywords: matrix, aij, compressed row, sparse, parallel 3891 3892 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3893 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3894 @*/ 3895 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3896 { 3897 PetscErrorCode ierr; 3898 3899 PetscFunctionBegin; 3900 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3901 PetscFunctionReturn(0); 3902 } 3903 3904 #undef __FUNCT__ 3905 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3906 /*@C 3907 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3908 (the default parallel PETSc format). For good matrix assembly performance 3909 the user should preallocate the matrix storage by setting the parameters 3910 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3911 performance can be increased by more than a factor of 50. 3912 3913 Collective on MPI_Comm 3914 3915 Input Parameters: 3916 + A - the matrix 3917 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3918 (same value is used for all local rows) 3919 . d_nnz - array containing the number of nonzeros in the various rows of the 3920 DIAGONAL portion of the local submatrix (possibly different for each row) 3921 or NULL, if d_nz is used to specify the nonzero structure. 3922 The size of this array is equal to the number of local rows, i.e 'm'. 3923 For matrices that will be factored, you must leave room for (and set) 3924 the diagonal entry even if it is zero. 3925 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3926 submatrix (same value is used for all local rows). 3927 - o_nnz - array containing the number of nonzeros in the various rows of the 3928 OFF-DIAGONAL portion of the local submatrix (possibly different for 3929 each row) or NULL, if o_nz is used to specify the nonzero 3930 structure. The size of this array is equal to the number 3931 of local rows, i.e 'm'. 3932 3933 If the *_nnz parameter is given then the *_nz parameter is ignored 3934 3935 The AIJ format (also called the Yale sparse matrix format or 3936 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3937 storage. The stored row and column indices begin with zero. 3938 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3939 3940 The parallel matrix is partitioned such that the first m0 rows belong to 3941 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3942 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3943 3944 The DIAGONAL portion of the local submatrix of a processor can be defined 3945 as the submatrix which is obtained by extraction the part corresponding to 3946 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3947 first row that belongs to the processor, r2 is the last row belonging to 3948 the this processor, and c1-c2 is range of indices of the local part of a 3949 vector suitable for applying the matrix to. This is an mxn matrix. In the 3950 common case of a square matrix, the row and column ranges are the same and 3951 the DIAGONAL part is also square. The remaining portion of the local 3952 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3953 3954 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3955 3956 You can call MatGetInfo() to get information on how effective the preallocation was; 3957 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3958 You can also run with the option -info and look for messages with the string 3959 malloc in them to see if additional memory allocation was needed. 3960 3961 Example usage: 3962 3963 Consider the following 8x8 matrix with 34 non-zero values, that is 3964 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3965 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3966 as follows: 3967 3968 .vb 3969 1 2 0 | 0 3 0 | 0 4 3970 Proc0 0 5 6 | 7 0 0 | 8 0 3971 9 0 10 | 11 0 0 | 12 0 3972 ------------------------------------- 3973 13 0 14 | 15 16 17 | 0 0 3974 Proc1 0 18 0 | 19 20 21 | 0 0 3975 0 0 0 | 22 23 0 | 24 0 3976 ------------------------------------- 3977 Proc2 25 26 27 | 0 0 28 | 29 0 3978 30 0 0 | 31 32 33 | 0 34 3979 .ve 3980 3981 This can be represented as a collection of submatrices as: 3982 3983 .vb 3984 A B C 3985 D E F 3986 G H I 3987 .ve 3988 3989 Where the submatrices A,B,C are owned by proc0, D,E,F are 3990 owned by proc1, G,H,I are owned by proc2. 3991 3992 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3993 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3994 The 'M','N' parameters are 8,8, and have the same values on all procs. 3995 3996 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3997 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3998 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3999 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4000 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4001 matrix, ans [DF] as another SeqAIJ matrix. 4002 4003 When d_nz, o_nz parameters are specified, d_nz storage elements are 4004 allocated for every row of the local diagonal submatrix, and o_nz 4005 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4006 One way to choose d_nz and o_nz is to use the max nonzerors per local 4007 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4008 In this case, the values of d_nz,o_nz are: 4009 .vb 4010 proc0 : dnz = 2, o_nz = 2 4011 proc1 : dnz = 3, o_nz = 2 4012 proc2 : dnz = 1, o_nz = 4 4013 .ve 4014 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4015 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4016 for proc3. i.e we are using 12+15+10=37 storage locations to store 4017 34 values. 4018 4019 When d_nnz, o_nnz parameters are specified, the storage is specified 4020 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4021 In the above case the values for d_nnz,o_nnz are: 4022 .vb 4023 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4024 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4025 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4026 .ve 4027 Here the space allocated is sum of all the above values i.e 34, and 4028 hence pre-allocation is perfect. 4029 4030 Level: intermediate 4031 4032 .keywords: matrix, aij, compressed row, sparse, parallel 4033 4034 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4035 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4036 @*/ 4037 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4038 { 4039 PetscErrorCode ierr; 4040 4041 PetscFunctionBegin; 4042 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4043 PetscValidType(B,1); 4044 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4045 PetscFunctionReturn(0); 4046 } 4047 4048 #undef __FUNCT__ 4049 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4050 /*@ 4051 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4052 CSR format the local rows. 4053 4054 Collective on MPI_Comm 4055 4056 Input Parameters: 4057 + comm - MPI communicator 4058 . m - number of local rows (Cannot be PETSC_DECIDE) 4059 . n - This value should be the same as the local size used in creating the 4060 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4061 calculated if N is given) For square matrices n is almost always m. 4062 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4063 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4064 . i - row indices 4065 . j - column indices 4066 - a - matrix values 4067 4068 Output Parameter: 4069 . mat - the matrix 4070 4071 Level: intermediate 4072 4073 Notes: 4074 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4075 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4076 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4077 4078 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4079 4080 The format which is used for the sparse matrix input, is equivalent to a 4081 row-major ordering.. i.e for the following matrix, the input data expected is 4082 as shown: 4083 4084 1 0 0 4085 2 0 3 P0 4086 ------- 4087 4 5 6 P1 4088 4089 Process0 [P0]: rows_owned=[0,1] 4090 i = {0,1,3} [size = nrow+1 = 2+1] 4091 j = {0,0,2} [size = nz = 6] 4092 v = {1,2,3} [size = nz = 6] 4093 4094 Process1 [P1]: rows_owned=[2] 4095 i = {0,3} [size = nrow+1 = 1+1] 4096 j = {0,1,2} [size = nz = 6] 4097 v = {4,5,6} [size = nz = 6] 4098 4099 .keywords: matrix, aij, compressed row, sparse, parallel 4100 4101 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4102 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4103 @*/ 4104 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4105 { 4106 PetscErrorCode ierr; 4107 4108 PetscFunctionBegin; 4109 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4110 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4111 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4112 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4113 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4114 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4115 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4116 PetscFunctionReturn(0); 4117 } 4118 4119 #undef __FUNCT__ 4120 #define __FUNCT__ "MatCreateAIJ" 4121 /*@C 4122 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4123 (the default parallel PETSc format). For good matrix assembly performance 4124 the user should preallocate the matrix storage by setting the parameters 4125 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4126 performance can be increased by more than a factor of 50. 4127 4128 Collective on MPI_Comm 4129 4130 Input Parameters: 4131 + comm - MPI communicator 4132 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4133 This value should be the same as the local size used in creating the 4134 y vector for the matrix-vector product y = Ax. 4135 . n - This value should be the same as the local size used in creating the 4136 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4137 calculated if N is given) For square matrices n is almost always m. 4138 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4139 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4140 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4141 (same value is used for all local rows) 4142 . d_nnz - array containing the number of nonzeros in the various rows of the 4143 DIAGONAL portion of the local submatrix (possibly different for each row) 4144 or NULL, if d_nz is used to specify the nonzero structure. 4145 The size of this array is equal to the number of local rows, i.e 'm'. 4146 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4147 submatrix (same value is used for all local rows). 4148 - o_nnz - array containing the number of nonzeros in the various rows of the 4149 OFF-DIAGONAL portion of the local submatrix (possibly different for 4150 each row) or NULL, if o_nz is used to specify the nonzero 4151 structure. The size of this array is equal to the number 4152 of local rows, i.e 'm'. 4153 4154 Output Parameter: 4155 . A - the matrix 4156 4157 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4158 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4159 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4160 4161 Notes: 4162 If the *_nnz parameter is given then the *_nz parameter is ignored 4163 4164 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4165 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4166 storage requirements for this matrix. 4167 4168 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4169 processor than it must be used on all processors that share the object for 4170 that argument. 4171 4172 The user MUST specify either the local or global matrix dimensions 4173 (possibly both). 4174 4175 The parallel matrix is partitioned across processors such that the 4176 first m0 rows belong to process 0, the next m1 rows belong to 4177 process 1, the next m2 rows belong to process 2 etc.. where 4178 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4179 values corresponding to [m x N] submatrix. 4180 4181 The columns are logically partitioned with the n0 columns belonging 4182 to 0th partition, the next n1 columns belonging to the next 4183 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4184 4185 The DIAGONAL portion of the local submatrix on any given processor 4186 is the submatrix corresponding to the rows and columns m,n 4187 corresponding to the given processor. i.e diagonal matrix on 4188 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4189 etc. The remaining portion of the local submatrix [m x (N-n)] 4190 constitute the OFF-DIAGONAL portion. The example below better 4191 illustrates this concept. 4192 4193 For a square global matrix we define each processor's diagonal portion 4194 to be its local rows and the corresponding columns (a square submatrix); 4195 each processor's off-diagonal portion encompasses the remainder of the 4196 local matrix (a rectangular submatrix). 4197 4198 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4199 4200 When calling this routine with a single process communicator, a matrix of 4201 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4202 type of communicator, use the construction mechanism: 4203 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4204 4205 By default, this format uses inodes (identical nodes) when possible. 4206 We search for consecutive rows with the same nonzero structure, thereby 4207 reusing matrix information to achieve increased efficiency. 4208 4209 Options Database Keys: 4210 + -mat_no_inode - Do not use inodes 4211 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4212 - -mat_aij_oneindex - Internally use indexing starting at 1 4213 rather than 0. Note that when calling MatSetValues(), 4214 the user still MUST index entries starting at 0! 4215 4216 4217 Example usage: 4218 4219 Consider the following 8x8 matrix with 34 non-zero values, that is 4220 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4221 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4222 as follows: 4223 4224 .vb 4225 1 2 0 | 0 3 0 | 0 4 4226 Proc0 0 5 6 | 7 0 0 | 8 0 4227 9 0 10 | 11 0 0 | 12 0 4228 ------------------------------------- 4229 13 0 14 | 15 16 17 | 0 0 4230 Proc1 0 18 0 | 19 20 21 | 0 0 4231 0 0 0 | 22 23 0 | 24 0 4232 ------------------------------------- 4233 Proc2 25 26 27 | 0 0 28 | 29 0 4234 30 0 0 | 31 32 33 | 0 34 4235 .ve 4236 4237 This can be represented as a collection of submatrices as: 4238 4239 .vb 4240 A B C 4241 D E F 4242 G H I 4243 .ve 4244 4245 Where the submatrices A,B,C are owned by proc0, D,E,F are 4246 owned by proc1, G,H,I are owned by proc2. 4247 4248 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4249 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4250 The 'M','N' parameters are 8,8, and have the same values on all procs. 4251 4252 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4253 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4254 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4255 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4256 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4257 matrix, ans [DF] as another SeqAIJ matrix. 4258 4259 When d_nz, o_nz parameters are specified, d_nz storage elements are 4260 allocated for every row of the local diagonal submatrix, and o_nz 4261 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4262 One way to choose d_nz and o_nz is to use the max nonzerors per local 4263 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4264 In this case, the values of d_nz,o_nz are: 4265 .vb 4266 proc0 : dnz = 2, o_nz = 2 4267 proc1 : dnz = 3, o_nz = 2 4268 proc2 : dnz = 1, o_nz = 4 4269 .ve 4270 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4271 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4272 for proc3. i.e we are using 12+15+10=37 storage locations to store 4273 34 values. 4274 4275 When d_nnz, o_nnz parameters are specified, the storage is specified 4276 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4277 In the above case the values for d_nnz,o_nnz are: 4278 .vb 4279 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4280 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4281 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4282 .ve 4283 Here the space allocated is sum of all the above values i.e 34, and 4284 hence pre-allocation is perfect. 4285 4286 Level: intermediate 4287 4288 .keywords: matrix, aij, compressed row, sparse, parallel 4289 4290 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4291 MPIAIJ, MatCreateMPIAIJWithArrays() 4292 @*/ 4293 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4294 { 4295 PetscErrorCode ierr; 4296 PetscMPIInt size; 4297 4298 PetscFunctionBegin; 4299 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4300 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4301 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4302 if (size > 1) { 4303 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4304 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4305 } else { 4306 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4307 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4308 } 4309 PetscFunctionReturn(0); 4310 } 4311 4312 #undef __FUNCT__ 4313 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4314 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4315 { 4316 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4317 4318 PetscFunctionBegin; 4319 *Ad = a->A; 4320 *Ao = a->B; 4321 *colmap = a->garray; 4322 PetscFunctionReturn(0); 4323 } 4324 4325 #undef __FUNCT__ 4326 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4327 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4328 { 4329 PetscErrorCode ierr; 4330 PetscInt i; 4331 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4332 4333 PetscFunctionBegin; 4334 if (coloring->ctype == IS_COLORING_GLOBAL) { 4335 ISColoringValue *allcolors,*colors; 4336 ISColoring ocoloring; 4337 4338 /* set coloring for diagonal portion */ 4339 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4340 4341 /* set coloring for off-diagonal portion */ 4342 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4343 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4344 for (i=0; i<a->B->cmap->n; i++) { 4345 colors[i] = allcolors[a->garray[i]]; 4346 } 4347 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4348 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4349 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4350 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4351 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4352 ISColoringValue *colors; 4353 PetscInt *larray; 4354 ISColoring ocoloring; 4355 4356 /* set coloring for diagonal portion */ 4357 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4358 for (i=0; i<a->A->cmap->n; i++) { 4359 larray[i] = i + A->cmap->rstart; 4360 } 4361 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4362 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4363 for (i=0; i<a->A->cmap->n; i++) { 4364 colors[i] = coloring->colors[larray[i]]; 4365 } 4366 ierr = PetscFree(larray);CHKERRQ(ierr); 4367 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4368 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4369 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4370 4371 /* set coloring for off-diagonal portion */ 4372 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4373 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4374 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4375 for (i=0; i<a->B->cmap->n; i++) { 4376 colors[i] = coloring->colors[larray[i]]; 4377 } 4378 ierr = PetscFree(larray);CHKERRQ(ierr); 4379 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4380 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4381 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4382 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4383 PetscFunctionReturn(0); 4384 } 4385 4386 #undef __FUNCT__ 4387 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4388 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4389 { 4390 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4391 PetscErrorCode ierr; 4392 4393 PetscFunctionBegin; 4394 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4395 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4396 PetscFunctionReturn(0); 4397 } 4398 4399 #undef __FUNCT__ 4400 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4401 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4402 { 4403 PetscErrorCode ierr; 4404 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4405 PetscInt *indx; 4406 4407 PetscFunctionBegin; 4408 /* This routine will ONLY return MPIAIJ type matrix */ 4409 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4410 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4411 if (n == PETSC_DECIDE) { 4412 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4413 } 4414 /* Check sum(n) = N */ 4415 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4416 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4417 4418 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4419 rstart -= m; 4420 4421 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4422 for (i=0; i<m; i++) { 4423 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4424 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4425 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4426 } 4427 4428 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4429 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4430 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4431 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4432 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4433 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4434 PetscFunctionReturn(0); 4435 } 4436 4437 #undef __FUNCT__ 4438 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4439 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4440 { 4441 PetscErrorCode ierr; 4442 PetscInt m,N,i,rstart,nnz,Ii; 4443 PetscInt *indx; 4444 PetscScalar *values; 4445 4446 PetscFunctionBegin; 4447 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4448 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4449 for (i=0; i<m; i++) { 4450 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4451 Ii = i + rstart; 4452 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4453 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4454 } 4455 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4456 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4457 PetscFunctionReturn(0); 4458 } 4459 4460 #undef __FUNCT__ 4461 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4462 /*@ 4463 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4464 matrices from each processor 4465 4466 Collective on MPI_Comm 4467 4468 Input Parameters: 4469 + comm - the communicators the parallel matrix will live on 4470 . inmat - the input sequential matrices 4471 . n - number of local columns (or PETSC_DECIDE) 4472 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4473 4474 Output Parameter: 4475 . outmat - the parallel matrix generated 4476 4477 Level: advanced 4478 4479 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4480 4481 @*/ 4482 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4483 { 4484 PetscErrorCode ierr; 4485 4486 PetscFunctionBegin; 4487 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4488 if (scall == MAT_INITIAL_MATRIX) { 4489 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4490 } 4491 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4492 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4493 PetscFunctionReturn(0); 4494 } 4495 4496 #undef __FUNCT__ 4497 #define __FUNCT__ "MatFileSplit" 4498 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4499 { 4500 PetscErrorCode ierr; 4501 PetscMPIInt rank; 4502 PetscInt m,N,i,rstart,nnz; 4503 size_t len; 4504 const PetscInt *indx; 4505 PetscViewer out; 4506 char *name; 4507 Mat B; 4508 const PetscScalar *values; 4509 4510 PetscFunctionBegin; 4511 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4512 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4513 /* Should this be the type of the diagonal block of A? */ 4514 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4515 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4516 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4517 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4518 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4519 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4520 for (i=0; i<m; i++) { 4521 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4522 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4523 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4524 } 4525 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4526 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4527 4528 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4529 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4530 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4531 sprintf(name,"%s.%d",outfile,rank); 4532 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4533 ierr = PetscFree(name);CHKERRQ(ierr); 4534 ierr = MatView(B,out);CHKERRQ(ierr); 4535 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4536 ierr = MatDestroy(&B);CHKERRQ(ierr); 4537 PetscFunctionReturn(0); 4538 } 4539 4540 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4541 #undef __FUNCT__ 4542 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4543 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4544 { 4545 PetscErrorCode ierr; 4546 Mat_Merge_SeqsToMPI *merge; 4547 PetscContainer container; 4548 4549 PetscFunctionBegin; 4550 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4551 if (container) { 4552 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4557 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4558 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4559 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4560 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4561 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4562 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4563 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4564 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4565 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4566 ierr = PetscFree(merge);CHKERRQ(ierr); 4567 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4568 } 4569 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4570 PetscFunctionReturn(0); 4571 } 4572 4573 #include <../src/mat/utils/freespace.h> 4574 #include <petscbt.h> 4575 4576 #undef __FUNCT__ 4577 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4578 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4579 { 4580 PetscErrorCode ierr; 4581 MPI_Comm comm; 4582 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4583 PetscMPIInt size,rank,taga,*len_s; 4584 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4585 PetscInt proc,m; 4586 PetscInt **buf_ri,**buf_rj; 4587 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4588 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4589 MPI_Request *s_waits,*r_waits; 4590 MPI_Status *status; 4591 MatScalar *aa=a->a; 4592 MatScalar **abuf_r,*ba_i; 4593 Mat_Merge_SeqsToMPI *merge; 4594 PetscContainer container; 4595 4596 PetscFunctionBegin; 4597 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4598 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4599 4600 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4601 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4602 4603 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4604 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4605 4606 bi = merge->bi; 4607 bj = merge->bj; 4608 buf_ri = merge->buf_ri; 4609 buf_rj = merge->buf_rj; 4610 4611 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4612 owners = merge->rowmap->range; 4613 len_s = merge->len_s; 4614 4615 /* send and recv matrix values */ 4616 /*-----------------------------*/ 4617 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4618 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4619 4620 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4621 for (proc=0,k=0; proc<size; proc++) { 4622 if (!len_s[proc]) continue; 4623 i = owners[proc]; 4624 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4625 k++; 4626 } 4627 4628 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4629 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4630 ierr = PetscFree(status);CHKERRQ(ierr); 4631 4632 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4633 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4634 4635 /* insert mat values of mpimat */ 4636 /*----------------------------*/ 4637 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4638 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4639 4640 for (k=0; k<merge->nrecv; k++) { 4641 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4642 nrows = *(buf_ri_k[k]); 4643 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4644 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4645 } 4646 4647 /* set values of ba */ 4648 m = merge->rowmap->n; 4649 for (i=0; i<m; i++) { 4650 arow = owners[rank] + i; 4651 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4652 bnzi = bi[i+1] - bi[i]; 4653 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4654 4655 /* add local non-zero vals of this proc's seqmat into ba */ 4656 anzi = ai[arow+1] - ai[arow]; 4657 aj = a->j + ai[arow]; 4658 aa = a->a + ai[arow]; 4659 nextaj = 0; 4660 for (j=0; nextaj<anzi; j++) { 4661 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4662 ba_i[j] += aa[nextaj++]; 4663 } 4664 } 4665 4666 /* add received vals into ba */ 4667 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4668 /* i-th row */ 4669 if (i == *nextrow[k]) { 4670 anzi = *(nextai[k]+1) - *nextai[k]; 4671 aj = buf_rj[k] + *(nextai[k]); 4672 aa = abuf_r[k] + *(nextai[k]); 4673 nextaj = 0; 4674 for (j=0; nextaj<anzi; j++) { 4675 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4676 ba_i[j] += aa[nextaj++]; 4677 } 4678 } 4679 nextrow[k]++; nextai[k]++; 4680 } 4681 } 4682 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4683 } 4684 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4685 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4686 4687 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4688 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4689 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4690 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4691 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4692 PetscFunctionReturn(0); 4693 } 4694 4695 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4696 4697 #undef __FUNCT__ 4698 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4699 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4700 { 4701 PetscErrorCode ierr; 4702 Mat B_mpi; 4703 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4704 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4705 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4706 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4707 PetscInt len,proc,*dnz,*onz,bs,cbs; 4708 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4709 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4710 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4711 MPI_Status *status; 4712 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4713 PetscBT lnkbt; 4714 Mat_Merge_SeqsToMPI *merge; 4715 PetscContainer container; 4716 4717 PetscFunctionBegin; 4718 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4719 4720 /* make sure it is a PETSc comm */ 4721 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4722 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4723 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4724 4725 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4726 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4727 4728 /* determine row ownership */ 4729 /*---------------------------------------------------------*/ 4730 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4731 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4732 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4733 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4734 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4735 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4736 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4737 4738 m = merge->rowmap->n; 4739 owners = merge->rowmap->range; 4740 4741 /* determine the number of messages to send, their lengths */ 4742 /*---------------------------------------------------------*/ 4743 len_s = merge->len_s; 4744 4745 len = 0; /* length of buf_si[] */ 4746 merge->nsend = 0; 4747 for (proc=0; proc<size; proc++) { 4748 len_si[proc] = 0; 4749 if (proc == rank) { 4750 len_s[proc] = 0; 4751 } else { 4752 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4753 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4754 } 4755 if (len_s[proc]) { 4756 merge->nsend++; 4757 nrows = 0; 4758 for (i=owners[proc]; i<owners[proc+1]; i++) { 4759 if (ai[i+1] > ai[i]) nrows++; 4760 } 4761 len_si[proc] = 2*(nrows+1); 4762 len += len_si[proc]; 4763 } 4764 } 4765 4766 /* determine the number and length of messages to receive for ij-structure */ 4767 /*-------------------------------------------------------------------------*/ 4768 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4769 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4770 4771 /* post the Irecv of j-structure */ 4772 /*-------------------------------*/ 4773 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4774 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4775 4776 /* post the Isend of j-structure */ 4777 /*--------------------------------*/ 4778 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4779 4780 for (proc=0, k=0; proc<size; proc++) { 4781 if (!len_s[proc]) continue; 4782 i = owners[proc]; 4783 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4784 k++; 4785 } 4786 4787 /* receives and sends of j-structure are complete */ 4788 /*------------------------------------------------*/ 4789 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4790 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4791 4792 /* send and recv i-structure */ 4793 /*---------------------------*/ 4794 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4795 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4796 4797 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4798 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4799 for (proc=0,k=0; proc<size; proc++) { 4800 if (!len_s[proc]) continue; 4801 /* form outgoing message for i-structure: 4802 buf_si[0]: nrows to be sent 4803 [1:nrows]: row index (global) 4804 [nrows+1:2*nrows+1]: i-structure index 4805 */ 4806 /*-------------------------------------------*/ 4807 nrows = len_si[proc]/2 - 1; 4808 buf_si_i = buf_si + nrows+1; 4809 buf_si[0] = nrows; 4810 buf_si_i[0] = 0; 4811 nrows = 0; 4812 for (i=owners[proc]; i<owners[proc+1]; i++) { 4813 anzi = ai[i+1] - ai[i]; 4814 if (anzi) { 4815 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4816 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4817 nrows++; 4818 } 4819 } 4820 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4821 k++; 4822 buf_si += len_si[proc]; 4823 } 4824 4825 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4826 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4827 4828 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4829 for (i=0; i<merge->nrecv; i++) { 4830 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4831 } 4832 4833 ierr = PetscFree(len_si);CHKERRQ(ierr); 4834 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4835 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4836 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4837 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4838 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4839 ierr = PetscFree(status);CHKERRQ(ierr); 4840 4841 /* compute a local seq matrix in each processor */ 4842 /*----------------------------------------------*/ 4843 /* allocate bi array and free space for accumulating nonzero column info */ 4844 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4845 bi[0] = 0; 4846 4847 /* create and initialize a linked list */ 4848 nlnk = N+1; 4849 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4850 4851 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4852 len = ai[owners[rank+1]] - ai[owners[rank]]; 4853 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4854 4855 current_space = free_space; 4856 4857 /* determine symbolic info for each local row */ 4858 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4859 4860 for (k=0; k<merge->nrecv; k++) { 4861 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4862 nrows = *buf_ri_k[k]; 4863 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4864 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4865 } 4866 4867 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4868 len = 0; 4869 for (i=0; i<m; i++) { 4870 bnzi = 0; 4871 /* add local non-zero cols of this proc's seqmat into lnk */ 4872 arow = owners[rank] + i; 4873 anzi = ai[arow+1] - ai[arow]; 4874 aj = a->j + ai[arow]; 4875 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4876 bnzi += nlnk; 4877 /* add received col data into lnk */ 4878 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4879 if (i == *nextrow[k]) { /* i-th row */ 4880 anzi = *(nextai[k]+1) - *nextai[k]; 4881 aj = buf_rj[k] + *nextai[k]; 4882 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4883 bnzi += nlnk; 4884 nextrow[k]++; nextai[k]++; 4885 } 4886 } 4887 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4888 4889 /* if free space is not available, make more free space */ 4890 if (current_space->local_remaining<bnzi) { 4891 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4892 nspacedouble++; 4893 } 4894 /* copy data into free space, then initialize lnk */ 4895 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4896 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4897 4898 current_space->array += bnzi; 4899 current_space->local_used += bnzi; 4900 current_space->local_remaining -= bnzi; 4901 4902 bi[i+1] = bi[i] + bnzi; 4903 } 4904 4905 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4906 4907 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4908 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4909 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4910 4911 /* create symbolic parallel matrix B_mpi */ 4912 /*---------------------------------------*/ 4913 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4914 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4915 if (n==PETSC_DECIDE) { 4916 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4917 } else { 4918 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4919 } 4920 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4921 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4922 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4923 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4924 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4925 4926 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4927 B_mpi->assembled = PETSC_FALSE; 4928 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4929 merge->bi = bi; 4930 merge->bj = bj; 4931 merge->buf_ri = buf_ri; 4932 merge->buf_rj = buf_rj; 4933 merge->coi = NULL; 4934 merge->coj = NULL; 4935 merge->owners_co = NULL; 4936 4937 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4938 4939 /* attach the supporting struct to B_mpi for reuse */ 4940 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4941 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4942 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4943 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4944 *mpimat = B_mpi; 4945 4946 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4947 PetscFunctionReturn(0); 4948 } 4949 4950 #undef __FUNCT__ 4951 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4952 /*@C 4953 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4954 matrices from each processor 4955 4956 Collective on MPI_Comm 4957 4958 Input Parameters: 4959 + comm - the communicators the parallel matrix will live on 4960 . seqmat - the input sequential matrices 4961 . m - number of local rows (or PETSC_DECIDE) 4962 . n - number of local columns (or PETSC_DECIDE) 4963 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4964 4965 Output Parameter: 4966 . mpimat - the parallel matrix generated 4967 4968 Level: advanced 4969 4970 Notes: 4971 The dimensions of the sequential matrix in each processor MUST be the same. 4972 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4973 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4974 @*/ 4975 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4976 { 4977 PetscErrorCode ierr; 4978 PetscMPIInt size; 4979 4980 PetscFunctionBegin; 4981 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4982 if (size == 1) { 4983 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4984 if (scall == MAT_INITIAL_MATRIX) { 4985 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4986 } else { 4987 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4988 } 4989 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4990 PetscFunctionReturn(0); 4991 } 4992 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4993 if (scall == MAT_INITIAL_MATRIX) { 4994 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4995 } 4996 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4997 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4998 PetscFunctionReturn(0); 4999 } 5000 5001 #undef __FUNCT__ 5002 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5003 /*@ 5004 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5005 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5006 with MatGetSize() 5007 5008 Not Collective 5009 5010 Input Parameters: 5011 + A - the matrix 5012 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5013 5014 Output Parameter: 5015 . A_loc - the local sequential matrix generated 5016 5017 Level: developer 5018 5019 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5020 5021 @*/ 5022 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5023 { 5024 PetscErrorCode ierr; 5025 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5026 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5027 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5028 MatScalar *aa=a->a,*ba=b->a,*cam; 5029 PetscScalar *ca; 5030 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5031 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5032 PetscBool match; 5033 5034 PetscFunctionBegin; 5035 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5036 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5037 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5038 if (scall == MAT_INITIAL_MATRIX) { 5039 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5040 ci[0] = 0; 5041 for (i=0; i<am; i++) { 5042 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5043 } 5044 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5045 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5046 k = 0; 5047 for (i=0; i<am; i++) { 5048 ncols_o = bi[i+1] - bi[i]; 5049 ncols_d = ai[i+1] - ai[i]; 5050 /* off-diagonal portion of A */ 5051 for (jo=0; jo<ncols_o; jo++) { 5052 col = cmap[*bj]; 5053 if (col >= cstart) break; 5054 cj[k] = col; bj++; 5055 ca[k++] = *ba++; 5056 } 5057 /* diagonal portion of A */ 5058 for (j=0; j<ncols_d; j++) { 5059 cj[k] = cstart + *aj++; 5060 ca[k++] = *aa++; 5061 } 5062 /* off-diagonal portion of A */ 5063 for (j=jo; j<ncols_o; j++) { 5064 cj[k] = cmap[*bj++]; 5065 ca[k++] = *ba++; 5066 } 5067 } 5068 /* put together the new matrix */ 5069 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5070 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5071 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5072 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5073 mat->free_a = PETSC_TRUE; 5074 mat->free_ij = PETSC_TRUE; 5075 mat->nonew = 0; 5076 } else if (scall == MAT_REUSE_MATRIX) { 5077 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5078 ci = mat->i; cj = mat->j; cam = mat->a; 5079 for (i=0; i<am; i++) { 5080 /* off-diagonal portion of A */ 5081 ncols_o = bi[i+1] - bi[i]; 5082 for (jo=0; jo<ncols_o; jo++) { 5083 col = cmap[*bj]; 5084 if (col >= cstart) break; 5085 *cam++ = *ba++; bj++; 5086 } 5087 /* diagonal portion of A */ 5088 ncols_d = ai[i+1] - ai[i]; 5089 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5090 /* off-diagonal portion of A */ 5091 for (j=jo; j<ncols_o; j++) { 5092 *cam++ = *ba++; bj++; 5093 } 5094 } 5095 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5096 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5097 PetscFunctionReturn(0); 5098 } 5099 5100 #undef __FUNCT__ 5101 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5102 /*@C 5103 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5104 5105 Not Collective 5106 5107 Input Parameters: 5108 + A - the matrix 5109 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5110 - row, col - index sets of rows and columns to extract (or NULL) 5111 5112 Output Parameter: 5113 . A_loc - the local sequential matrix generated 5114 5115 Level: developer 5116 5117 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5118 5119 @*/ 5120 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5121 { 5122 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5123 PetscErrorCode ierr; 5124 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5125 IS isrowa,iscola; 5126 Mat *aloc; 5127 PetscBool match; 5128 5129 PetscFunctionBegin; 5130 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5131 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5132 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5133 if (!row) { 5134 start = A->rmap->rstart; end = A->rmap->rend; 5135 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5136 } else { 5137 isrowa = *row; 5138 } 5139 if (!col) { 5140 start = A->cmap->rstart; 5141 cmap = a->garray; 5142 nzA = a->A->cmap->n; 5143 nzB = a->B->cmap->n; 5144 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5145 ncols = 0; 5146 for (i=0; i<nzB; i++) { 5147 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5148 else break; 5149 } 5150 imark = i; 5151 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5152 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5153 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5154 } else { 5155 iscola = *col; 5156 } 5157 if (scall != MAT_INITIAL_MATRIX) { 5158 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5159 aloc[0] = *A_loc; 5160 } 5161 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5162 *A_loc = aloc[0]; 5163 ierr = PetscFree(aloc);CHKERRQ(ierr); 5164 if (!row) { 5165 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5166 } 5167 if (!col) { 5168 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5169 } 5170 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5171 PetscFunctionReturn(0); 5172 } 5173 5174 #undef __FUNCT__ 5175 #define __FUNCT__ "MatGetBrowsOfAcols" 5176 /*@C 5177 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5178 5179 Collective on Mat 5180 5181 Input Parameters: 5182 + A,B - the matrices in mpiaij format 5183 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5184 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5185 5186 Output Parameter: 5187 + rowb, colb - index sets of rows and columns of B to extract 5188 - B_seq - the sequential matrix generated 5189 5190 Level: developer 5191 5192 @*/ 5193 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5194 { 5195 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5196 PetscErrorCode ierr; 5197 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5198 IS isrowb,iscolb; 5199 Mat *bseq=NULL; 5200 5201 PetscFunctionBegin; 5202 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5203 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5204 } 5205 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5206 5207 if (scall == MAT_INITIAL_MATRIX) { 5208 start = A->cmap->rstart; 5209 cmap = a->garray; 5210 nzA = a->A->cmap->n; 5211 nzB = a->B->cmap->n; 5212 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5213 ncols = 0; 5214 for (i=0; i<nzB; i++) { /* row < local row index */ 5215 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5216 else break; 5217 } 5218 imark = i; 5219 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5220 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5221 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5222 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5223 } else { 5224 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5225 isrowb = *rowb; iscolb = *colb; 5226 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5227 bseq[0] = *B_seq; 5228 } 5229 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5230 *B_seq = bseq[0]; 5231 ierr = PetscFree(bseq);CHKERRQ(ierr); 5232 if (!rowb) { 5233 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5234 } else { 5235 *rowb = isrowb; 5236 } 5237 if (!colb) { 5238 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5239 } else { 5240 *colb = iscolb; 5241 } 5242 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5243 PetscFunctionReturn(0); 5244 } 5245 5246 #undef __FUNCT__ 5247 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5248 /* 5249 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5250 of the OFF-DIAGONAL portion of local A 5251 5252 Collective on Mat 5253 5254 Input Parameters: 5255 + A,B - the matrices in mpiaij format 5256 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5257 5258 Output Parameter: 5259 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5260 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5261 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5262 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5263 5264 Level: developer 5265 5266 */ 5267 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5268 { 5269 VecScatter_MPI_General *gen_to,*gen_from; 5270 PetscErrorCode ierr; 5271 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5272 Mat_SeqAIJ *b_oth; 5273 VecScatter ctx =a->Mvctx; 5274 MPI_Comm comm; 5275 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5276 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5277 PetscScalar *rvalues,*svalues; 5278 MatScalar *b_otha,*bufa,*bufA; 5279 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5280 MPI_Request *rwaits = NULL,*swaits = NULL; 5281 MPI_Status *sstatus,rstatus; 5282 PetscMPIInt jj; 5283 PetscInt *cols,sbs,rbs; 5284 PetscScalar *vals; 5285 5286 PetscFunctionBegin; 5287 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5288 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5289 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5290 } 5291 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5292 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5293 5294 gen_to = (VecScatter_MPI_General*)ctx->todata; 5295 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5296 rvalues = gen_from->values; /* holds the length of receiving row */ 5297 svalues = gen_to->values; /* holds the length of sending row */ 5298 nrecvs = gen_from->n; 5299 nsends = gen_to->n; 5300 5301 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5302 srow = gen_to->indices; /* local row index to be sent */ 5303 sstarts = gen_to->starts; 5304 sprocs = gen_to->procs; 5305 sstatus = gen_to->sstatus; 5306 sbs = gen_to->bs; 5307 rstarts = gen_from->starts; 5308 rprocs = gen_from->procs; 5309 rbs = gen_from->bs; 5310 5311 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5312 if (scall == MAT_INITIAL_MATRIX) { 5313 /* i-array */ 5314 /*---------*/ 5315 /* post receives */ 5316 for (i=0; i<nrecvs; i++) { 5317 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5318 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5319 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5320 } 5321 5322 /* pack the outgoing message */ 5323 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5324 5325 sstartsj[0] = 0; 5326 rstartsj[0] = 0; 5327 len = 0; /* total length of j or a array to be sent */ 5328 k = 0; 5329 for (i=0; i<nsends; i++) { 5330 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5331 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5332 for (j=0; j<nrows; j++) { 5333 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5334 for (l=0; l<sbs; l++) { 5335 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5336 5337 rowlen[j*sbs+l] = ncols; 5338 5339 len += ncols; 5340 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5341 } 5342 k++; 5343 } 5344 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5345 5346 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5347 } 5348 /* recvs and sends of i-array are completed */ 5349 i = nrecvs; 5350 while (i--) { 5351 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5352 } 5353 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5354 5355 /* allocate buffers for sending j and a arrays */ 5356 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5357 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5358 5359 /* create i-array of B_oth */ 5360 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5361 5362 b_othi[0] = 0; 5363 len = 0; /* total length of j or a array to be received */ 5364 k = 0; 5365 for (i=0; i<nrecvs; i++) { 5366 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5367 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5368 for (j=0; j<nrows; j++) { 5369 b_othi[k+1] = b_othi[k] + rowlen[j]; 5370 len += rowlen[j]; k++; 5371 } 5372 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5373 } 5374 5375 /* allocate space for j and a arrrays of B_oth */ 5376 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5377 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5378 5379 /* j-array */ 5380 /*---------*/ 5381 /* post receives of j-array */ 5382 for (i=0; i<nrecvs; i++) { 5383 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5384 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5385 } 5386 5387 /* pack the outgoing message j-array */ 5388 k = 0; 5389 for (i=0; i<nsends; i++) { 5390 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5391 bufJ = bufj+sstartsj[i]; 5392 for (j=0; j<nrows; j++) { 5393 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5394 for (ll=0; ll<sbs; ll++) { 5395 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5396 for (l=0; l<ncols; l++) { 5397 *bufJ++ = cols[l]; 5398 } 5399 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5400 } 5401 } 5402 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5403 } 5404 5405 /* recvs and sends of j-array are completed */ 5406 i = nrecvs; 5407 while (i--) { 5408 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5409 } 5410 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5411 } else if (scall == MAT_REUSE_MATRIX) { 5412 sstartsj = *startsj_s; 5413 rstartsj = *startsj_r; 5414 bufa = *bufa_ptr; 5415 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5416 b_otha = b_oth->a; 5417 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5418 5419 /* a-array */ 5420 /*---------*/ 5421 /* post receives of a-array */ 5422 for (i=0; i<nrecvs; i++) { 5423 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5424 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5425 } 5426 5427 /* pack the outgoing message a-array */ 5428 k = 0; 5429 for (i=0; i<nsends; i++) { 5430 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5431 bufA = bufa+sstartsj[i]; 5432 for (j=0; j<nrows; j++) { 5433 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5434 for (ll=0; ll<sbs; ll++) { 5435 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5436 for (l=0; l<ncols; l++) { 5437 *bufA++ = vals[l]; 5438 } 5439 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5440 } 5441 } 5442 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5443 } 5444 /* recvs and sends of a-array are completed */ 5445 i = nrecvs; 5446 while (i--) { 5447 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5448 } 5449 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5450 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5451 5452 if (scall == MAT_INITIAL_MATRIX) { 5453 /* put together the new matrix */ 5454 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5455 5456 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5457 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5458 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5459 b_oth->free_a = PETSC_TRUE; 5460 b_oth->free_ij = PETSC_TRUE; 5461 b_oth->nonew = 0; 5462 5463 ierr = PetscFree(bufj);CHKERRQ(ierr); 5464 if (!startsj_s || !bufa_ptr) { 5465 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5466 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5467 } else { 5468 *startsj_s = sstartsj; 5469 *startsj_r = rstartsj; 5470 *bufa_ptr = bufa; 5471 } 5472 } 5473 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5474 PetscFunctionReturn(0); 5475 } 5476 5477 #undef __FUNCT__ 5478 #define __FUNCT__ "MatGetCommunicationStructs" 5479 /*@C 5480 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5481 5482 Not Collective 5483 5484 Input Parameters: 5485 . A - The matrix in mpiaij format 5486 5487 Output Parameter: 5488 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5489 . colmap - A map from global column index to local index into lvec 5490 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5491 5492 Level: developer 5493 5494 @*/ 5495 #if defined(PETSC_USE_CTABLE) 5496 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5497 #else 5498 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5499 #endif 5500 { 5501 Mat_MPIAIJ *a; 5502 5503 PetscFunctionBegin; 5504 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5505 PetscValidPointer(lvec, 2); 5506 PetscValidPointer(colmap, 3); 5507 PetscValidPointer(multScatter, 4); 5508 a = (Mat_MPIAIJ*) A->data; 5509 if (lvec) *lvec = a->lvec; 5510 if (colmap) *colmap = a->colmap; 5511 if (multScatter) *multScatter = a->Mvctx; 5512 PetscFunctionReturn(0); 5513 } 5514 5515 EXTERN_C_BEGIN 5516 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5517 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5518 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5519 EXTERN_C_END 5520 5521 #undef __FUNCT__ 5522 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5523 /* 5524 Computes (B'*A')' since computing B*A directly is untenable 5525 5526 n p p 5527 ( ) ( ) ( ) 5528 m ( A ) * n ( B ) = m ( C ) 5529 ( ) ( ) ( ) 5530 5531 */ 5532 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5533 { 5534 PetscErrorCode ierr; 5535 Mat At,Bt,Ct; 5536 5537 PetscFunctionBegin; 5538 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5539 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5540 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5541 ierr = MatDestroy(&At);CHKERRQ(ierr); 5542 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5543 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5544 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5545 PetscFunctionReturn(0); 5546 } 5547 5548 #undef __FUNCT__ 5549 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5550 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5551 { 5552 PetscErrorCode ierr; 5553 PetscInt m=A->rmap->n,n=B->cmap->n; 5554 Mat Cmat; 5555 5556 PetscFunctionBegin; 5557 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5558 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5559 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5560 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5561 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5562 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5563 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5564 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5565 5566 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5567 5568 *C = Cmat; 5569 PetscFunctionReturn(0); 5570 } 5571 5572 /* ----------------------------------------------------------------*/ 5573 #undef __FUNCT__ 5574 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5575 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5576 { 5577 PetscErrorCode ierr; 5578 5579 PetscFunctionBegin; 5580 if (scall == MAT_INITIAL_MATRIX) { 5581 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5582 } 5583 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5584 PetscFunctionReturn(0); 5585 } 5586 5587 EXTERN_C_BEGIN 5588 #if defined(PETSC_HAVE_MUMPS) 5589 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5590 #endif 5591 #if defined(PETSC_HAVE_PASTIX) 5592 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5593 #endif 5594 #if defined(PETSC_HAVE_SUPERLU_DIST) 5595 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5596 #endif 5597 #if defined(PETSC_HAVE_CLIQUE) 5598 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5599 #endif 5600 EXTERN_C_END 5601 5602 /*MC 5603 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5604 5605 Options Database Keys: 5606 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5607 5608 Level: beginner 5609 5610 .seealso: MatCreateAIJ() 5611 M*/ 5612 5613 EXTERN_C_BEGIN 5614 #undef __FUNCT__ 5615 #define __FUNCT__ "MatCreate_MPIAIJ" 5616 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5617 { 5618 Mat_MPIAIJ *b; 5619 PetscErrorCode ierr; 5620 PetscMPIInt size; 5621 5622 PetscFunctionBegin; 5623 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5624 5625 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5626 B->data = (void*)b; 5627 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5628 B->assembled = PETSC_FALSE; 5629 B->insertmode = NOT_SET_VALUES; 5630 b->size = size; 5631 5632 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5633 5634 /* build cache for off array entries formed */ 5635 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5636 5637 b->donotstash = PETSC_FALSE; 5638 b->colmap = 0; 5639 b->garray = 0; 5640 b->roworiented = PETSC_TRUE; 5641 5642 /* stuff used for matrix vector multiply */ 5643 b->lvec = NULL; 5644 b->Mvctx = NULL; 5645 5646 /* stuff for MatGetRow() */ 5647 b->rowindices = 0; 5648 b->rowvalues = 0; 5649 b->getrowactive = PETSC_FALSE; 5650 5651 /* flexible pointer used in CUSP/CUSPARSE classes */ 5652 b->spptr = NULL; 5653 5654 #if defined(PETSC_HAVE_MUMPS) 5655 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5656 "MatGetFactor_aij_mumps", 5657 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5658 #endif 5659 #if defined(PETSC_HAVE_PASTIX) 5660 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5661 "MatGetFactor_mpiaij_pastix", 5662 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5663 #endif 5664 #if defined(PETSC_HAVE_SUPERLU_DIST) 5665 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5666 "MatGetFactor_mpiaij_superlu_dist", 5667 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5668 #endif 5669 #if defined(PETSC_HAVE_CLIQUE) 5670 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5671 "MatGetFactor_aij_clique", 5672 MatGetFactor_aij_clique);CHKERRQ(ierr); 5673 #endif 5674 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5675 "MatStoreValues_MPIAIJ", 5676 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5677 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5678 "MatRetrieveValues_MPIAIJ", 5679 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5680 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5681 "MatGetDiagonalBlock_MPIAIJ", 5682 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5683 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5684 "MatIsTranspose_MPIAIJ", 5685 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5686 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5687 "MatMPIAIJSetPreallocation_MPIAIJ", 5688 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5689 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5690 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5691 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5692 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5693 "MatDiagonalScaleLocal_MPIAIJ", 5694 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5695 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5696 "MatConvert_MPIAIJ_MPIAIJPERM", 5697 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5698 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5699 "MatConvert_MPIAIJ_MPIAIJCRL", 5700 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5701 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5702 "MatConvert_MPIAIJ_MPISBAIJ", 5703 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5704 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5705 "MatMatMult_MPIDense_MPIAIJ", 5706 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5707 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5708 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5709 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5710 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5711 "MatMatMultNumeric_MPIDense_MPIAIJ", 5712 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5713 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5714 PetscFunctionReturn(0); 5715 } 5716 EXTERN_C_END 5717 5718 #undef __FUNCT__ 5719 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5720 /*@ 5721 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5722 and "off-diagonal" part of the matrix in CSR format. 5723 5724 Collective on MPI_Comm 5725 5726 Input Parameters: 5727 + comm - MPI communicator 5728 . m - number of local rows (Cannot be PETSC_DECIDE) 5729 . n - This value should be the same as the local size used in creating the 5730 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5731 calculated if N is given) For square matrices n is almost always m. 5732 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5733 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5734 . i - row indices for "diagonal" portion of matrix 5735 . j - column indices 5736 . a - matrix values 5737 . oi - row indices for "off-diagonal" portion of matrix 5738 . oj - column indices 5739 - oa - matrix values 5740 5741 Output Parameter: 5742 . mat - the matrix 5743 5744 Level: advanced 5745 5746 Notes: 5747 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5748 must free the arrays once the matrix has been destroyed and not before. 5749 5750 The i and j indices are 0 based 5751 5752 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5753 5754 This sets local rows and cannot be used to set off-processor values. 5755 5756 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5757 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5758 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5759 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5760 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5761 communication if it is known that only local entries will be set. 5762 5763 .keywords: matrix, aij, compressed row, sparse, parallel 5764 5765 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5766 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5767 @*/ 5768 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5769 { 5770 PetscErrorCode ierr; 5771 Mat_MPIAIJ *maij; 5772 5773 PetscFunctionBegin; 5774 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5775 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5776 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5777 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5778 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5779 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5780 maij = (Mat_MPIAIJ*) (*mat)->data; 5781 5782 (*mat)->preallocated = PETSC_TRUE; 5783 5784 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5785 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5786 5787 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5788 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5789 5790 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5791 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5792 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5793 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5794 5795 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5796 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5797 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5798 PetscFunctionReturn(0); 5799 } 5800 5801 /* 5802 Special version for direct calls from Fortran 5803 */ 5804 #include <petsc-private/fortranimpl.h> 5805 5806 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5807 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5808 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5809 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5810 #endif 5811 5812 /* Change these macros so can be used in void function */ 5813 #undef CHKERRQ 5814 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5815 #undef SETERRQ2 5816 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5817 #undef SETERRQ3 5818 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5819 #undef SETERRQ 5820 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5821 5822 EXTERN_C_BEGIN 5823 #undef __FUNCT__ 5824 #define __FUNCT__ "matsetvaluesmpiaij_" 5825 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5826 { 5827 Mat mat = *mmat; 5828 PetscInt m = *mm, n = *mn; 5829 InsertMode addv = *maddv; 5830 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5831 PetscScalar value; 5832 PetscErrorCode ierr; 5833 5834 MatCheckPreallocated(mat,1); 5835 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5836 5837 #if defined(PETSC_USE_DEBUG) 5838 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5839 #endif 5840 { 5841 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5842 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5843 PetscBool roworiented = aij->roworiented; 5844 5845 /* Some Variables required in the macro */ 5846 Mat A = aij->A; 5847 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5848 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5849 MatScalar *aa = a->a; 5850 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5851 Mat B = aij->B; 5852 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5853 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5854 MatScalar *ba = b->a; 5855 5856 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5857 PetscInt nonew = a->nonew; 5858 MatScalar *ap1,*ap2; 5859 5860 PetscFunctionBegin; 5861 for (i=0; i<m; i++) { 5862 if (im[i] < 0) continue; 5863 #if defined(PETSC_USE_DEBUG) 5864 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5865 #endif 5866 if (im[i] >= rstart && im[i] < rend) { 5867 row = im[i] - rstart; 5868 lastcol1 = -1; 5869 rp1 = aj + ai[row]; 5870 ap1 = aa + ai[row]; 5871 rmax1 = aimax[row]; 5872 nrow1 = ailen[row]; 5873 low1 = 0; 5874 high1 = nrow1; 5875 lastcol2 = -1; 5876 rp2 = bj + bi[row]; 5877 ap2 = ba + bi[row]; 5878 rmax2 = bimax[row]; 5879 nrow2 = bilen[row]; 5880 low2 = 0; 5881 high2 = nrow2; 5882 5883 for (j=0; j<n; j++) { 5884 if (roworiented) value = v[i*n+j]; 5885 else value = v[i+j*m]; 5886 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5887 if (in[j] >= cstart && in[j] < cend) { 5888 col = in[j] - cstart; 5889 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5890 } else if (in[j] < 0) continue; 5891 #if defined(PETSC_USE_DEBUG) 5892 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5893 #endif 5894 else { 5895 if (mat->was_assembled) { 5896 if (!aij->colmap) { 5897 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5898 } 5899 #if defined(PETSC_USE_CTABLE) 5900 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5901 col--; 5902 #else 5903 col = aij->colmap[in[j]] - 1; 5904 #endif 5905 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5906 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5907 col = in[j]; 5908 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5909 B = aij->B; 5910 b = (Mat_SeqAIJ*)B->data; 5911 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5912 rp2 = bj + bi[row]; 5913 ap2 = ba + bi[row]; 5914 rmax2 = bimax[row]; 5915 nrow2 = bilen[row]; 5916 low2 = 0; 5917 high2 = nrow2; 5918 bm = aij->B->rmap->n; 5919 ba = b->a; 5920 } 5921 } else col = in[j]; 5922 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5923 } 5924 } 5925 } else if (!aij->donotstash) { 5926 if (roworiented) { 5927 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5928 } else { 5929 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5930 } 5931 } 5932 } 5933 } 5934 PetscFunctionReturnVoid(); 5935 } 5936 EXTERN_C_END 5937 5938