1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1006 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 1009 /* remove zeroed rows of off diagonal matrix */ 1010 ii = aij->i; 1011 for (i=0; i<slen; i++) { 1012 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1013 } 1014 1015 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1016 if (aij->compressedrow.use){ 1017 m = aij->compressedrow.nrows; 1018 ii = aij->compressedrow.i; 1019 ridx = aij->compressedrow.rindex; 1020 for (i=0; i<m; i++){ 1021 n = ii[i+1] - ii[i]; 1022 aj = aij->j + ii[i]; 1023 aa = aij->a + ii[i]; 1024 1025 for (j=0; j<n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 bb[*ridx] -= *aa*xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 ridx++; 1034 } 1035 } else { /* do not use compressed row format */ 1036 m = l->B->rmap->n; 1037 for (i=0; i<m; i++) { 1038 n = ii[i+1] - ii[i]; 1039 aj = aij->j + ii[i]; 1040 aa = aij->a + ii[i]; 1041 for (j=0; j<n; j++) { 1042 if (PetscAbsScalar(mask[*aj])) { 1043 bb[i] -= *aa*xx[*aj]; 1044 *aa = 0.0; 1045 } 1046 aa++; 1047 aj++; 1048 } 1049 } 1050 } 1051 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1052 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1053 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1054 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1055 ierr = PetscFree(lrows);CHKERRQ(ierr); 1056 1057 /* wait on sends */ 1058 if (nsends) { 1059 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1060 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1061 ierr = PetscFree(send_status);CHKERRQ(ierr); 1062 } 1063 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1064 ierr = PetscFree(svalues);CHKERRQ(ierr); 1065 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMult_MPIAIJ" 1071 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 PetscInt nt; 1076 1077 PetscFunctionBegin; 1078 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1079 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1080 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1082 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1083 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1089 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1101 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1109 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1116 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscBool merged; 1121 1122 PetscFunctionBegin; 1123 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1124 /* do nondiagonal part */ 1125 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1126 if (!merged) { 1127 /* send it on its way */ 1128 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1129 /* do local part */ 1130 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1131 /* receive remote parts: note this assumes the values are not actually */ 1132 /* added in yy until the next line, */ 1133 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1134 } else { 1135 /* do local part */ 1136 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1137 /* send it on its way */ 1138 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1139 /* values actually were received in the Begin() but we need to call this nop */ 1140 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 } 1142 PetscFunctionReturn(0); 1143 } 1144 1145 EXTERN_C_BEGIN 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1148 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1149 { 1150 MPI_Comm comm; 1151 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1152 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1153 IS Me,Notme; 1154 PetscErrorCode ierr; 1155 PetscInt M,N,first,last,*notme,i; 1156 PetscMPIInt size; 1157 1158 PetscFunctionBegin; 1159 1160 /* Easy test: symmetric diagonal block */ 1161 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1162 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1163 if (!*f) PetscFunctionReturn(0); 1164 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1165 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1166 if (size == 1) PetscFunctionReturn(0); 1167 1168 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1169 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1170 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1171 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1172 for (i=0; i<first; i++) notme[i] = i; 1173 for (i=last; i<M; i++) notme[i-last+first] = i; 1174 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1175 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1176 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1177 Aoff = Aoffs[0]; 1178 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1179 Boff = Boffs[0]; 1180 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1181 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1182 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1183 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1184 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1185 ierr = PetscFree(notme);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 EXTERN_C_END 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1192 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1193 { 1194 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1195 PetscErrorCode ierr; 1196 1197 PetscFunctionBegin; 1198 /* do nondiagonal part */ 1199 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1200 /* send it on its way */ 1201 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1202 /* do local part */ 1203 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1204 /* receive remote parts */ 1205 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 /* 1210 This only works correctly for square matrices where the subblock A->A is the 1211 diagonal block 1212 */ 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1215 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1216 { 1217 PetscErrorCode ierr; 1218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1219 1220 PetscFunctionBegin; 1221 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1222 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1223 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatScale_MPIAIJ" 1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1230 { 1231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1236 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatDestroy_MPIAIJ" 1242 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1243 { 1244 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 #if defined(PETSC_USE_LOG) 1249 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1250 #endif 1251 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1252 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1253 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1254 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1255 #if defined (PETSC_USE_CTABLE) 1256 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1257 #else 1258 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1259 #endif 1260 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1262 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1263 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1264 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1265 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1266 1267 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1281 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1282 { 1283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1284 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1285 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1286 PetscErrorCode ierr; 1287 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1288 int fd; 1289 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1290 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1291 PetscScalar *column_values; 1292 PetscInt message_count,flowcontrolcount; 1293 1294 PetscFunctionBegin; 1295 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1296 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1297 nz = A->nz + B->nz; 1298 if (!rank) { 1299 header[0] = MAT_FILE_CLASSID; 1300 header[1] = mat->rmap->N; 1301 header[2] = mat->cmap->N; 1302 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1303 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1304 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1305 /* get largest number of rows any processor has */ 1306 rlen = mat->rmap->n; 1307 range = mat->rmap->range; 1308 for (i=1; i<size; i++) { 1309 rlen = PetscMax(rlen,range[i+1] - range[i]); 1310 } 1311 } else { 1312 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1313 rlen = mat->rmap->n; 1314 } 1315 1316 /* load up the local row counts */ 1317 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1318 for (i=0; i<mat->rmap->n; i++) { 1319 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1320 } 1321 1322 /* store the row lengths to the file */ 1323 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1324 if (!rank) { 1325 MPI_Status status; 1326 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1327 for (i=1; i<size; i++) { 1328 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1329 rlen = range[i+1] - range[i]; 1330 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1331 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1332 } 1333 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1334 } else { 1335 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1336 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1337 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1338 } 1339 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1340 1341 /* load up the local column indices */ 1342 nzmax = nz; /* )th processor needs space a largest processor needs */ 1343 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1344 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1345 cnt = 0; 1346 for (i=0; i<mat->rmap->n; i++) { 1347 for (j=B->i[i]; j<B->i[i+1]; j++) { 1348 if ( (col = garray[B->j[j]]) > cstart) break; 1349 column_indices[cnt++] = col; 1350 } 1351 for (k=A->i[i]; k<A->i[i+1]; k++) { 1352 column_indices[cnt++] = A->j[k] + cstart; 1353 } 1354 for (; j<B->i[i+1]; j++) { 1355 column_indices[cnt++] = garray[B->j[j]]; 1356 } 1357 } 1358 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1359 1360 /* store the column indices to the file */ 1361 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1362 if (!rank) { 1363 MPI_Status status; 1364 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1365 for (i=1; i<size; i++) { 1366 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1367 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1368 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1369 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1370 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1371 } 1372 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1373 } else { 1374 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1375 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1376 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1377 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1378 } 1379 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1380 1381 /* load up the local column values */ 1382 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1383 cnt = 0; 1384 for (i=0; i<mat->rmap->n; i++) { 1385 for (j=B->i[i]; j<B->i[i+1]; j++) { 1386 if ( garray[B->j[j]] > cstart) break; 1387 column_values[cnt++] = B->a[j]; 1388 } 1389 for (k=A->i[i]; k<A->i[i+1]; k++) { 1390 column_values[cnt++] = A->a[k]; 1391 } 1392 for (; j<B->i[i+1]; j++) { 1393 column_values[cnt++] = B->a[j]; 1394 } 1395 } 1396 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1397 1398 /* store the column values to the file */ 1399 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1400 if (!rank) { 1401 MPI_Status status; 1402 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1403 for (i=1; i<size; i++) { 1404 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1405 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1406 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1407 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1408 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1409 } 1410 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1411 } else { 1412 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1413 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1414 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1415 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1416 } 1417 ierr = PetscFree(column_values);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1423 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1424 { 1425 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1426 PetscErrorCode ierr; 1427 PetscMPIInt rank = aij->rank,size = aij->size; 1428 PetscBool isdraw,iascii,isbinary; 1429 PetscViewer sviewer; 1430 PetscViewerFormat format; 1431 1432 PetscFunctionBegin; 1433 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1434 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1435 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1436 if (iascii) { 1437 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1438 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1439 MatInfo info; 1440 PetscBool inodes; 1441 1442 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1443 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1444 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1445 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1446 if (!inodes) { 1447 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1448 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1449 } else { 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1451 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1452 } 1453 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1454 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1455 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1456 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1457 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1458 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1460 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1463 PetscInt inodecount,inodelimit,*inodes; 1464 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1465 if (inodes) { 1466 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1467 } else { 1468 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1472 PetscFunctionReturn(0); 1473 } 1474 } else if (isbinary) { 1475 if (size == 1) { 1476 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1477 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1478 } else { 1479 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1480 } 1481 PetscFunctionReturn(0); 1482 } else if (isdraw) { 1483 PetscDraw draw; 1484 PetscBool isnull; 1485 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1486 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1487 } 1488 1489 if (size == 1) { 1490 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1491 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1492 } else { 1493 /* assemble the entire matrix onto first processor. */ 1494 Mat A; 1495 Mat_SeqAIJ *Aloc; 1496 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1497 MatScalar *a; 1498 1499 if (mat->rmap->N > 1024) { 1500 PetscBool flg = PETSC_FALSE; 1501 1502 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1503 if (!flg) { 1504 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1505 } 1506 } 1507 1508 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1509 if (!rank) { 1510 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1511 } else { 1512 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1513 } 1514 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1515 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1516 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1517 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1518 1519 /* copy over the A part */ 1520 Aloc = (Mat_SeqAIJ*)aij->A->data; 1521 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1522 row = mat->rmap->rstart; 1523 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1524 for (i=0; i<m; i++) { 1525 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1526 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1527 } 1528 aj = Aloc->j; 1529 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1530 1531 /* copy over the B part */ 1532 Aloc = (Mat_SeqAIJ*)aij->B->data; 1533 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1534 row = mat->rmap->rstart; 1535 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1536 ct = cols; 1537 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1538 for (i=0; i<m; i++) { 1539 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1540 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1541 } 1542 ierr = PetscFree(ct);CHKERRQ(ierr); 1543 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1544 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1545 /* 1546 Everyone has to call to draw the matrix since the graphics waits are 1547 synchronized across all processors that share the PetscDraw object 1548 */ 1549 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1550 if (!rank) { 1551 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1552 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1553 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1554 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1555 } 1556 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1557 ierr = MatDestroy(&A);CHKERRQ(ierr); 1558 } 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatView_MPIAIJ" 1564 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1565 { 1566 PetscErrorCode ierr; 1567 PetscBool iascii,isdraw,issocket,isbinary; 1568 1569 PetscFunctionBegin; 1570 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1571 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1572 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1573 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1574 if (iascii || isdraw || isbinary || issocket) { 1575 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1576 } else { 1577 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1578 } 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatSOR_MPIAIJ" 1584 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1585 { 1586 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1587 PetscErrorCode ierr; 1588 Vec bb1 = 0; 1589 PetscBool hasop; 1590 1591 PetscFunctionBegin; 1592 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1593 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1594 } 1595 1596 if (flag == SOR_APPLY_UPPER) { 1597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1602 if (flag & SOR_ZERO_INITIAL_GUESS) { 1603 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1604 its--; 1605 } 1606 1607 while (its--) { 1608 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 1611 /* update rhs: bb1 = bb - B*x */ 1612 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1613 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1614 1615 /* local sweep */ 1616 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1617 } 1618 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1619 if (flag & SOR_ZERO_INITIAL_GUESS) { 1620 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1621 its--; 1622 } 1623 while (its--) { 1624 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 1627 /* update rhs: bb1 = bb - B*x */ 1628 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1629 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1630 1631 /* local sweep */ 1632 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1633 } 1634 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1635 if (flag & SOR_ZERO_INITIAL_GUESS) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 its--; 1638 } 1639 while (its--) { 1640 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 1643 /* update rhs: bb1 = bb - B*x */ 1644 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1645 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1646 1647 /* local sweep */ 1648 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1649 } 1650 } else if (flag & SOR_EISENSTAT) { 1651 Vec xx1; 1652 1653 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1654 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1655 1656 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 if (!mat->diag) { 1659 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1660 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1661 } 1662 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1663 if (hasop) { 1664 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1665 } else { 1666 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1667 } 1668 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1669 1670 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1671 1672 /* local sweep */ 1673 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1674 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1675 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1676 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1677 1678 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatPermute_MPIAIJ" 1684 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1685 { 1686 MPI_Comm comm,pcomm; 1687 PetscInt first,local_size,nrows; 1688 const PetscInt *rows; 1689 PetscMPIInt size; 1690 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1691 PetscErrorCode ierr; 1692 1693 PetscFunctionBegin; 1694 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1695 /* make a collective version of 'rowp' */ 1696 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1697 if (pcomm==comm) { 1698 crowp = rowp; 1699 } else { 1700 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1701 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1702 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1703 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1704 } 1705 /* collect the global row permutation and invert it */ 1706 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1707 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1708 if (pcomm!=comm) { 1709 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1710 } 1711 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1712 /* get the local target indices */ 1713 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1714 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1715 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1716 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1717 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1718 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1719 /* the column permutation is so much easier; 1720 make a local version of 'colp' and invert it */ 1721 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1722 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1723 if (size==1) { 1724 lcolp = colp; 1725 } else { 1726 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1727 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1728 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1729 } 1730 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1731 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1732 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1733 if (size>1) { 1734 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1735 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1736 } 1737 /* now we just get the submatrix */ 1738 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1739 /* clean up */ 1740 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1741 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1748 { 1749 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1750 Mat A = mat->A,B = mat->B; 1751 PetscErrorCode ierr; 1752 PetscReal isend[5],irecv[5]; 1753 1754 PetscFunctionBegin; 1755 info->block_size = 1.0; 1756 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1757 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1758 isend[3] = info->memory; isend[4] = info->mallocs; 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1761 isend[3] += info->memory; isend[4] += info->mallocs; 1762 if (flag == MAT_LOCAL) { 1763 info->nz_used = isend[0]; 1764 info->nz_allocated = isend[1]; 1765 info->nz_unneeded = isend[2]; 1766 info->memory = isend[3]; 1767 info->mallocs = isend[4]; 1768 } else if (flag == MAT_GLOBAL_MAX) { 1769 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1770 info->nz_used = irecv[0]; 1771 info->nz_allocated = irecv[1]; 1772 info->nz_unneeded = irecv[2]; 1773 info->memory = irecv[3]; 1774 info->mallocs = irecv[4]; 1775 } else if (flag == MAT_GLOBAL_SUM) { 1776 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1777 info->nz_used = irecv[0]; 1778 info->nz_allocated = irecv[1]; 1779 info->nz_unneeded = irecv[2]; 1780 info->memory = irecv[3]; 1781 info->mallocs = irecv[4]; 1782 } 1783 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1784 info->fill_ratio_needed = 0; 1785 info->factor_mallocs = 0; 1786 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatSetOption_MPIAIJ" 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1807 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1808 break; 1809 case MAT_ROW_ORIENTED: 1810 a->roworiented = flg; 1811 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1812 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1813 break; 1814 case MAT_NEW_DIAGONALS: 1815 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1816 break; 1817 case MAT_IGNORE_OFF_PROC_ENTRIES: 1818 a->donotstash = flg; 1819 break; 1820 case MAT_SPD: 1821 A->spd_set = PETSC_TRUE; 1822 A->spd = flg; 1823 if (flg) { 1824 A->symmetric = PETSC_TRUE; 1825 A->structurally_symmetric = PETSC_TRUE; 1826 A->symmetric_set = PETSC_TRUE; 1827 A->structurally_symmetric_set = PETSC_TRUE; 1828 } 1829 break; 1830 case MAT_SYMMETRIC: 1831 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1832 break; 1833 case MAT_STRUCTURALLY_SYMMETRIC: 1834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1835 break; 1836 case MAT_HERMITIAN: 1837 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1838 break; 1839 case MAT_SYMMETRY_ETERNAL: 1840 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1841 break; 1842 default: 1843 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatGetRow_MPIAIJ" 1850 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1851 { 1852 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1853 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1854 PetscErrorCode ierr; 1855 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1856 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1857 PetscInt *cmap,*idx_p; 1858 1859 PetscFunctionBegin; 1860 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1861 mat->getrowactive = PETSC_TRUE; 1862 1863 if (!mat->rowvalues && (idx || v)) { 1864 /* 1865 allocate enough space to hold information from the longest row. 1866 */ 1867 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1868 PetscInt max = 1,tmp; 1869 for (i=0; i<matin->rmap->n; i++) { 1870 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1871 if (max < tmp) { max = tmp; } 1872 } 1873 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1874 } 1875 1876 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1877 lrow = row - rstart; 1878 1879 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1880 if (!v) {pvA = 0; pvB = 0;} 1881 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1882 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1883 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1884 nztot = nzA + nzB; 1885 1886 cmap = mat->garray; 1887 if (v || idx) { 1888 if (nztot) { 1889 /* Sort by increasing column numbers, assuming A and B already sorted */ 1890 PetscInt imark = -1; 1891 if (v) { 1892 *v = v_p = mat->rowvalues; 1893 for (i=0; i<nzB; i++) { 1894 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1895 else break; 1896 } 1897 imark = i; 1898 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1899 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1900 } 1901 if (idx) { 1902 *idx = idx_p = mat->rowindices; 1903 if (imark > -1) { 1904 for (i=0; i<imark; i++) { 1905 idx_p[i] = cmap[cworkB[i]]; 1906 } 1907 } else { 1908 for (i=0; i<nzB; i++) { 1909 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1910 else break; 1911 } 1912 imark = i; 1913 } 1914 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1915 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1916 } 1917 } else { 1918 if (idx) *idx = 0; 1919 if (v) *v = 0; 1920 } 1921 } 1922 *nz = nztot; 1923 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1924 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1930 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1931 { 1932 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1933 1934 PetscFunctionBegin; 1935 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1936 aij->getrowactive = PETSC_FALSE; 1937 PetscFunctionReturn(0); 1938 } 1939 1940 #undef __FUNCT__ 1941 #define __FUNCT__ "MatNorm_MPIAIJ" 1942 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1943 { 1944 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1945 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1946 PetscErrorCode ierr; 1947 PetscInt i,j,cstart = mat->cmap->rstart; 1948 PetscReal sum = 0.0; 1949 MatScalar *v; 1950 1951 PetscFunctionBegin; 1952 if (aij->size == 1) { 1953 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1954 } else { 1955 if (type == NORM_FROBENIUS) { 1956 v = amat->a; 1957 for (i=0; i<amat->nz; i++) { 1958 #if defined(PETSC_USE_COMPLEX) 1959 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1960 #else 1961 sum += (*v)*(*v); v++; 1962 #endif 1963 } 1964 v = bmat->a; 1965 for (i=0; i<bmat->nz; i++) { 1966 #if defined(PETSC_USE_COMPLEX) 1967 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1968 #else 1969 sum += (*v)*(*v); v++; 1970 #endif 1971 } 1972 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1973 *norm = PetscSqrtReal(*norm); 1974 } else if (type == NORM_1) { /* max column norm */ 1975 PetscReal *tmp,*tmp2; 1976 PetscInt *jj,*garray = aij->garray; 1977 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1978 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1979 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1980 *norm = 0.0; 1981 v = amat->a; jj = amat->j; 1982 for (j=0; j<amat->nz; j++) { 1983 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a; jj = bmat->j; 1986 for (j=0; j<bmat->nz; j++) { 1987 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1988 } 1989 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1990 for (j=0; j<mat->cmap->N; j++) { 1991 if (tmp2[j] > *norm) *norm = tmp2[j]; 1992 } 1993 ierr = PetscFree(tmp);CHKERRQ(ierr); 1994 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1995 } else if (type == NORM_INFINITY) { /* max row norm */ 1996 PetscReal ntemp = 0.0; 1997 for (j=0; j<aij->A->rmap->n; j++) { 1998 v = amat->a + amat->i[j]; 1999 sum = 0.0; 2000 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2001 sum += PetscAbsScalar(*v); v++; 2002 } 2003 v = bmat->a + bmat->i[j]; 2004 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2005 sum += PetscAbsScalar(*v); v++; 2006 } 2007 if (sum > ntemp) ntemp = sum; 2008 } 2009 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2010 } else { 2011 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2012 } 2013 } 2014 PetscFunctionReturn(0); 2015 } 2016 2017 #undef __FUNCT__ 2018 #define __FUNCT__ "MatTranspose_MPIAIJ" 2019 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2020 { 2021 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2022 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2023 PetscErrorCode ierr; 2024 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2025 PetscInt cstart=A->cmap->rstart,ncol; 2026 Mat B; 2027 MatScalar *array; 2028 2029 PetscFunctionBegin; 2030 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2031 2032 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2033 ai = Aloc->i; aj = Aloc->j; 2034 bi = Bloc->i; bj = Bloc->j; 2035 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2036 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2037 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2038 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2039 for (i=0; i<ai[ma]; i++){ 2040 d_nnz[aj[i]] ++; 2041 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2042 } 2043 2044 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2045 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2046 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2047 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2048 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2049 } else { 2050 B = *matout; 2051 } 2052 2053 /* copy over the A part */ 2054 array = Aloc->a; 2055 row = A->rmap->rstart; 2056 for (i=0; i<ma; i++) { 2057 ncol = ai[i+1]-ai[i]; 2058 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2059 row++; array += ncol; aj += ncol; 2060 } 2061 aj = Aloc->j; 2062 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2063 2064 /* copy over the B part */ 2065 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2066 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2067 array = Bloc->a; 2068 row = A->rmap->rstart; 2069 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2070 cols_tmp = cols; 2071 for (i=0; i<mb; i++) { 2072 ncol = bi[i+1]-bi[i]; 2073 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2074 row++; array += ncol; cols_tmp += ncol; 2075 } 2076 ierr = PetscFree(cols);CHKERRQ(ierr); 2077 2078 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2081 *matout = B; 2082 } else { 2083 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2084 } 2085 PetscFunctionReturn(0); 2086 } 2087 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2090 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2091 { 2092 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2093 Mat a = aij->A,b = aij->B; 2094 PetscErrorCode ierr; 2095 PetscInt s1,s2,s3; 2096 2097 PetscFunctionBegin; 2098 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2099 if (rr) { 2100 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2101 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2102 /* Overlap communication with computation. */ 2103 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2104 } 2105 if (ll) { 2106 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2107 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2108 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2109 } 2110 /* scale the diagonal block */ 2111 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2112 2113 if (rr) { 2114 /* Do a scatter end and then right scale the off-diagonal block */ 2115 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2117 } 2118 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2124 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2125 { 2126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2131 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2132 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2133 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2138 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2139 { 2140 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 #undef __FUNCT__ 2149 #define __FUNCT__ "MatEqual_MPIAIJ" 2150 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2151 { 2152 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2153 Mat a,b,c,d; 2154 PetscBool flg; 2155 PetscErrorCode ierr; 2156 2157 PetscFunctionBegin; 2158 a = matA->A; b = matA->B; 2159 c = matB->A; d = matB->B; 2160 2161 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2162 if (flg) { 2163 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2164 } 2165 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatCopy_MPIAIJ" 2171 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2172 { 2173 PetscErrorCode ierr; 2174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2175 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2176 2177 PetscFunctionBegin; 2178 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2179 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2180 /* because of the column compression in the off-processor part of the matrix a->B, 2181 the number of columns in a->B and b->B may be different, hence we cannot call 2182 the MatCopy() directly on the two parts. If need be, we can provide a more 2183 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2184 then copying the submatrices */ 2185 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2186 } else { 2187 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2188 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2189 } 2190 PetscFunctionReturn(0); 2191 } 2192 2193 #undef __FUNCT__ 2194 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2195 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2196 { 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2201 PetscFunctionReturn(0); 2202 } 2203 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "MatAXPY_MPIAIJ" 2206 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2207 { 2208 PetscErrorCode ierr; 2209 PetscInt i; 2210 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2211 PetscBLASInt bnz,one=1; 2212 Mat_SeqAIJ *x,*y; 2213 2214 PetscFunctionBegin; 2215 if (str == SAME_NONZERO_PATTERN) { 2216 PetscScalar alpha = a; 2217 x = (Mat_SeqAIJ *)xx->A->data; 2218 y = (Mat_SeqAIJ *)yy->A->data; 2219 bnz = PetscBLASIntCast(x->nz); 2220 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2221 x = (Mat_SeqAIJ *)xx->B->data; 2222 y = (Mat_SeqAIJ *)yy->B->data; 2223 bnz = PetscBLASIntCast(x->nz); 2224 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2225 } else if (str == SUBSET_NONZERO_PATTERN) { 2226 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2227 2228 x = (Mat_SeqAIJ *)xx->B->data; 2229 y = (Mat_SeqAIJ *)yy->B->data; 2230 if (y->xtoy && y->XtoY != xx->B) { 2231 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2232 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2233 } 2234 if (!y->xtoy) { /* get xtoy */ 2235 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2236 y->XtoY = xx->B; 2237 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2238 } 2239 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2240 } else { 2241 Mat B; 2242 PetscInt *nnz_d,*nnz_o; 2243 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2244 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2245 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2246 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2247 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2248 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2249 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2250 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2251 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2252 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2253 ierr = MatHeaderReplace(Y,B); 2254 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2255 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2256 } 2257 PetscFunctionReturn(0); 2258 } 2259 2260 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2261 2262 #undef __FUNCT__ 2263 #define __FUNCT__ "MatConjugate_MPIAIJ" 2264 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2265 { 2266 #if defined(PETSC_USE_COMPLEX) 2267 PetscErrorCode ierr; 2268 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2269 2270 PetscFunctionBegin; 2271 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2272 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2273 #else 2274 PetscFunctionBegin; 2275 #endif 2276 PetscFunctionReturn(0); 2277 } 2278 2279 #undef __FUNCT__ 2280 #define __FUNCT__ "MatRealPart_MPIAIJ" 2281 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2282 { 2283 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2284 PetscErrorCode ierr; 2285 2286 PetscFunctionBegin; 2287 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2288 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2289 PetscFunctionReturn(0); 2290 } 2291 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2294 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2295 { 2296 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2297 PetscErrorCode ierr; 2298 2299 PetscFunctionBegin; 2300 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2301 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 2305 #ifdef PETSC_HAVE_PBGL 2306 2307 #include <boost/parallel/mpi/bsp_process_group.hpp> 2308 #include <boost/graph/distributed/ilu_default_graph.hpp> 2309 #include <boost/graph/distributed/ilu_0_block.hpp> 2310 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2311 #include <boost/graph/distributed/petsc/interface.hpp> 2312 #include <boost/multi_array.hpp> 2313 #include <boost/parallel/distributed_property_map->hpp> 2314 2315 #undef __FUNCT__ 2316 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2317 /* 2318 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2319 */ 2320 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2321 { 2322 namespace petsc = boost::distributed::petsc; 2323 2324 namespace graph_dist = boost::graph::distributed; 2325 using boost::graph::distributed::ilu_default::process_group_type; 2326 using boost::graph::ilu_permuted; 2327 2328 PetscBool row_identity, col_identity; 2329 PetscContainer c; 2330 PetscInt m, n, M, N; 2331 PetscErrorCode ierr; 2332 2333 PetscFunctionBegin; 2334 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2335 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2336 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2337 if (!row_identity || !col_identity) { 2338 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2339 } 2340 2341 process_group_type pg; 2342 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2343 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2344 lgraph_type& level_graph = *lgraph_p; 2345 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2346 2347 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2348 ilu_permuted(level_graph); 2349 2350 /* put together the new matrix */ 2351 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2352 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2353 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2354 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2355 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2356 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2357 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2358 2359 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2360 ierr = PetscContainerSetPointer(c, lgraph_p); 2361 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2362 ierr = PetscContainerDestroy(&c); 2363 PetscFunctionReturn(0); 2364 } 2365 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2368 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2369 { 2370 PetscFunctionBegin; 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatSolve_MPIAIJ" 2376 /* 2377 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2378 */ 2379 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2380 { 2381 namespace graph_dist = boost::graph::distributed; 2382 2383 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2384 lgraph_type* lgraph_p; 2385 PetscContainer c; 2386 PetscErrorCode ierr; 2387 2388 PetscFunctionBegin; 2389 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2390 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2391 ierr = VecCopy(b, x);CHKERRQ(ierr); 2392 2393 PetscScalar* array_x; 2394 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2395 PetscInt sx; 2396 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2397 2398 PetscScalar* array_b; 2399 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2400 PetscInt sb; 2401 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2402 2403 lgraph_type& level_graph = *lgraph_p; 2404 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2405 2406 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2407 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2408 ref_x(array_x, boost::extents[num_vertices(graph)]); 2409 2410 typedef boost::iterator_property_map<array_ref_type::iterator, 2411 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2412 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2413 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2414 2415 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2416 2417 PetscFunctionReturn(0); 2418 } 2419 #endif 2420 2421 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2422 PetscInt nzlocal,nsends,nrecvs; 2423 PetscMPIInt *send_rank,*recv_rank; 2424 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2425 PetscScalar *sbuf_a,**rbuf_a; 2426 PetscErrorCode (*Destroy)(Mat); 2427 } Mat_Redundant; 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2431 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2432 { 2433 PetscErrorCode ierr; 2434 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2435 PetscInt i; 2436 2437 PetscFunctionBegin; 2438 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2439 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2440 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2441 for (i=0; i<redund->nrecvs; i++){ 2442 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2443 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2444 } 2445 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2446 ierr = PetscFree(redund);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 #undef __FUNCT__ 2451 #define __FUNCT__ "MatDestroy_MatRedundant" 2452 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2453 { 2454 PetscErrorCode ierr; 2455 PetscContainer container; 2456 Mat_Redundant *redund=PETSC_NULL; 2457 2458 PetscFunctionBegin; 2459 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2460 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2461 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2462 A->ops->destroy = redund->Destroy; 2463 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2464 if (A->ops->destroy) { 2465 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2466 } 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2472 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2473 { 2474 PetscMPIInt rank,size; 2475 MPI_Comm comm=((PetscObject)mat)->comm; 2476 PetscErrorCode ierr; 2477 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2478 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2479 PetscInt *rowrange=mat->rmap->range; 2480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2481 Mat A=aij->A,B=aij->B,C=*matredundant; 2482 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2483 PetscScalar *sbuf_a; 2484 PetscInt nzlocal=a->nz+b->nz; 2485 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2486 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2487 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2488 MatScalar *aworkA,*aworkB; 2489 PetscScalar *vals; 2490 PetscMPIInt tag1,tag2,tag3,imdex; 2491 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2492 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2493 MPI_Status recv_status,*send_status; 2494 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2495 PetscInt **rbuf_j=PETSC_NULL; 2496 PetscScalar **rbuf_a=PETSC_NULL; 2497 Mat_Redundant *redund=PETSC_NULL; 2498 PetscContainer container; 2499 2500 PetscFunctionBegin; 2501 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2502 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2503 2504 if (reuse == MAT_REUSE_MATRIX) { 2505 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2506 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2507 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2508 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2509 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2510 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2511 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2512 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2513 2514 nsends = redund->nsends; 2515 nrecvs = redund->nrecvs; 2516 send_rank = redund->send_rank; 2517 recv_rank = redund->recv_rank; 2518 sbuf_nz = redund->sbuf_nz; 2519 rbuf_nz = redund->rbuf_nz; 2520 sbuf_j = redund->sbuf_j; 2521 sbuf_a = redund->sbuf_a; 2522 rbuf_j = redund->rbuf_j; 2523 rbuf_a = redund->rbuf_a; 2524 } 2525 2526 if (reuse == MAT_INITIAL_MATRIX){ 2527 PetscMPIInt subrank,subsize; 2528 PetscInt nleftover,np_subcomm; 2529 /* get the destination processors' id send_rank, nsends and nrecvs */ 2530 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2531 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2532 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2533 np_subcomm = size/nsubcomm; 2534 nleftover = size - nsubcomm*np_subcomm; 2535 nsends = 0; nrecvs = 0; 2536 for (i=0; i<size; i++){ /* i=rank*/ 2537 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2538 send_rank[nsends] = i; nsends++; 2539 recv_rank[nrecvs++] = i; 2540 } 2541 } 2542 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2543 i = size-nleftover-1; 2544 j = 0; 2545 while (j < nsubcomm - nleftover){ 2546 send_rank[nsends++] = i; 2547 i--; j++; 2548 } 2549 } 2550 2551 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2552 for (i=0; i<nleftover; i++){ 2553 recv_rank[nrecvs++] = size-nleftover+i; 2554 } 2555 } 2556 2557 /* allocate sbuf_j, sbuf_a */ 2558 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2559 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2560 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2561 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2562 2563 /* copy mat's local entries into the buffers */ 2564 if (reuse == MAT_INITIAL_MATRIX){ 2565 rownz_max = 0; 2566 rptr = sbuf_j; 2567 cols = sbuf_j + rend-rstart + 1; 2568 vals = sbuf_a; 2569 rptr[0] = 0; 2570 for (i=0; i<rend-rstart; i++){ 2571 row = i + rstart; 2572 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2573 ncols = nzA + nzB; 2574 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2575 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2576 /* load the column indices for this row into cols */ 2577 lwrite = 0; 2578 for (l=0; l<nzB; l++) { 2579 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2580 vals[lwrite] = aworkB[l]; 2581 cols[lwrite++] = ctmp; 2582 } 2583 } 2584 for (l=0; l<nzA; l++){ 2585 vals[lwrite] = aworkA[l]; 2586 cols[lwrite++] = cstart + cworkA[l]; 2587 } 2588 for (l=0; l<nzB; l++) { 2589 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2590 vals[lwrite] = aworkB[l]; 2591 cols[lwrite++] = ctmp; 2592 } 2593 } 2594 vals += ncols; 2595 cols += ncols; 2596 rptr[i+1] = rptr[i] + ncols; 2597 if (rownz_max < ncols) rownz_max = ncols; 2598 } 2599 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2600 } else { /* only copy matrix values into sbuf_a */ 2601 rptr = sbuf_j; 2602 vals = sbuf_a; 2603 rptr[0] = 0; 2604 for (i=0; i<rend-rstart; i++){ 2605 row = i + rstart; 2606 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2607 ncols = nzA + nzB; 2608 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2609 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2610 lwrite = 0; 2611 for (l=0; l<nzB; l++) { 2612 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2613 } 2614 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2615 for (l=0; l<nzB; l++) { 2616 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2617 } 2618 vals += ncols; 2619 rptr[i+1] = rptr[i] + ncols; 2620 } 2621 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2622 2623 /* send nzlocal to others, and recv other's nzlocal */ 2624 /*--------------------------------------------------*/ 2625 if (reuse == MAT_INITIAL_MATRIX){ 2626 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2627 s_waits2 = s_waits3 + nsends; 2628 s_waits1 = s_waits2 + nsends; 2629 r_waits1 = s_waits1 + nsends; 2630 r_waits2 = r_waits1 + nrecvs; 2631 r_waits3 = r_waits2 + nrecvs; 2632 } else { 2633 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2634 r_waits3 = s_waits3 + nsends; 2635 } 2636 2637 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2638 if (reuse == MAT_INITIAL_MATRIX){ 2639 /* get new tags to keep the communication clean */ 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2641 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2642 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2643 2644 /* post receives of other's nzlocal */ 2645 for (i=0; i<nrecvs; i++){ 2646 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2647 } 2648 /* send nzlocal to others */ 2649 for (i=0; i<nsends; i++){ 2650 sbuf_nz[i] = nzlocal; 2651 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2652 } 2653 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2654 count = nrecvs; 2655 while (count) { 2656 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2657 recv_rank[imdex] = recv_status.MPI_SOURCE; 2658 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2659 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2660 2661 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2662 rbuf_nz[imdex] += i + 2; 2663 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2664 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2665 count--; 2666 } 2667 /* wait on sends of nzlocal */ 2668 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2669 /* send mat->i,j to others, and recv from other's */ 2670 /*------------------------------------------------*/ 2671 for (i=0; i<nsends; i++){ 2672 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2673 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2674 } 2675 /* wait on receives of mat->i,j */ 2676 /*------------------------------*/ 2677 count = nrecvs; 2678 while (count) { 2679 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2680 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2681 count--; 2682 } 2683 /* wait on sends of mat->i,j */ 2684 /*---------------------------*/ 2685 if (nsends) { 2686 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2687 } 2688 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2689 2690 /* post receives, send and receive mat->a */ 2691 /*----------------------------------------*/ 2692 for (imdex=0; imdex<nrecvs; imdex++) { 2693 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2694 } 2695 for (i=0; i<nsends; i++){ 2696 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2697 } 2698 count = nrecvs; 2699 while (count) { 2700 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2701 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2702 count--; 2703 } 2704 if (nsends) { 2705 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2706 } 2707 2708 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2709 2710 /* create redundant matrix */ 2711 /*-------------------------*/ 2712 if (reuse == MAT_INITIAL_MATRIX){ 2713 /* compute rownz_max for preallocation */ 2714 for (imdex=0; imdex<nrecvs; imdex++){ 2715 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2716 rptr = rbuf_j[imdex]; 2717 for (i=0; i<j; i++){ 2718 ncols = rptr[i+1] - rptr[i]; 2719 if (rownz_max < ncols) rownz_max = ncols; 2720 } 2721 } 2722 2723 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2724 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2725 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2726 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2727 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2728 } else { 2729 C = *matredundant; 2730 } 2731 2732 /* insert local matrix entries */ 2733 rptr = sbuf_j; 2734 cols = sbuf_j + rend-rstart + 1; 2735 vals = sbuf_a; 2736 for (i=0; i<rend-rstart; i++){ 2737 row = i + rstart; 2738 ncols = rptr[i+1] - rptr[i]; 2739 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2740 vals += ncols; 2741 cols += ncols; 2742 } 2743 /* insert received matrix entries */ 2744 for (imdex=0; imdex<nrecvs; imdex++){ 2745 rstart = rowrange[recv_rank[imdex]]; 2746 rend = rowrange[recv_rank[imdex]+1]; 2747 rptr = rbuf_j[imdex]; 2748 cols = rbuf_j[imdex] + rend-rstart + 1; 2749 vals = rbuf_a[imdex]; 2750 for (i=0; i<rend-rstart; i++){ 2751 row = i + rstart; 2752 ncols = rptr[i+1] - rptr[i]; 2753 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2754 vals += ncols; 2755 cols += ncols; 2756 } 2757 } 2758 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2760 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2761 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2762 if (reuse == MAT_INITIAL_MATRIX) { 2763 PetscContainer container; 2764 *matredundant = C; 2765 /* create a supporting struct and attach it to C for reuse */ 2766 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2767 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2768 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2769 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2770 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2771 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2772 2773 redund->nzlocal = nzlocal; 2774 redund->nsends = nsends; 2775 redund->nrecvs = nrecvs; 2776 redund->send_rank = send_rank; 2777 redund->recv_rank = recv_rank; 2778 redund->sbuf_nz = sbuf_nz; 2779 redund->rbuf_nz = rbuf_nz; 2780 redund->sbuf_j = sbuf_j; 2781 redund->sbuf_a = sbuf_a; 2782 redund->rbuf_j = rbuf_j; 2783 redund->rbuf_a = rbuf_a; 2784 2785 redund->Destroy = C->ops->destroy; 2786 C->ops->destroy = MatDestroy_MatRedundant; 2787 } 2788 PetscFunctionReturn(0); 2789 } 2790 2791 #undef __FUNCT__ 2792 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2793 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2794 { 2795 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2796 PetscErrorCode ierr; 2797 PetscInt i,*idxb = 0; 2798 PetscScalar *va,*vb; 2799 Vec vtmp; 2800 2801 PetscFunctionBegin; 2802 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2803 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2804 if (idx) { 2805 for (i=0; i<A->rmap->n; i++) { 2806 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2807 } 2808 } 2809 2810 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2811 if (idx) { 2812 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2813 } 2814 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2815 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2816 2817 for (i=0; i<A->rmap->n; i++){ 2818 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2819 va[i] = vb[i]; 2820 if (idx) idx[i] = a->garray[idxb[i]]; 2821 } 2822 } 2823 2824 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2825 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2826 ierr = PetscFree(idxb);CHKERRQ(ierr); 2827 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 #undef __FUNCT__ 2832 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2833 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2834 { 2835 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2836 PetscErrorCode ierr; 2837 PetscInt i,*idxb = 0; 2838 PetscScalar *va,*vb; 2839 Vec vtmp; 2840 2841 PetscFunctionBegin; 2842 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2843 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2844 if (idx) { 2845 for (i=0; i<A->cmap->n; i++) { 2846 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2847 } 2848 } 2849 2850 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2851 if (idx) { 2852 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2853 } 2854 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2855 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2856 2857 for (i=0; i<A->rmap->n; i++){ 2858 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2859 va[i] = vb[i]; 2860 if (idx) idx[i] = a->garray[idxb[i]]; 2861 } 2862 } 2863 2864 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2865 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2866 ierr = PetscFree(idxb);CHKERRQ(ierr); 2867 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2868 PetscFunctionReturn(0); 2869 } 2870 2871 #undef __FUNCT__ 2872 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2873 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2874 { 2875 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2876 PetscInt n = A->rmap->n; 2877 PetscInt cstart = A->cmap->rstart; 2878 PetscInt *cmap = mat->garray; 2879 PetscInt *diagIdx, *offdiagIdx; 2880 Vec diagV, offdiagV; 2881 PetscScalar *a, *diagA, *offdiagA; 2882 PetscInt r; 2883 PetscErrorCode ierr; 2884 2885 PetscFunctionBegin; 2886 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2887 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2888 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2889 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2890 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2891 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2892 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2893 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2894 for(r = 0; r < n; ++r) { 2895 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2896 a[r] = diagA[r]; 2897 idx[r] = cstart + diagIdx[r]; 2898 } else { 2899 a[r] = offdiagA[r]; 2900 idx[r] = cmap[offdiagIdx[r]]; 2901 } 2902 } 2903 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2904 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2905 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2906 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2907 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2908 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2909 PetscFunctionReturn(0); 2910 } 2911 2912 #undef __FUNCT__ 2913 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2914 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2915 { 2916 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2917 PetscInt n = A->rmap->n; 2918 PetscInt cstart = A->cmap->rstart; 2919 PetscInt *cmap = mat->garray; 2920 PetscInt *diagIdx, *offdiagIdx; 2921 Vec diagV, offdiagV; 2922 PetscScalar *a, *diagA, *offdiagA; 2923 PetscInt r; 2924 PetscErrorCode ierr; 2925 2926 PetscFunctionBegin; 2927 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2928 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2929 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2930 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2931 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2932 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2933 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2934 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2935 for(r = 0; r < n; ++r) { 2936 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2937 a[r] = diagA[r]; 2938 idx[r] = cstart + diagIdx[r]; 2939 } else { 2940 a[r] = offdiagA[r]; 2941 idx[r] = cmap[offdiagIdx[r]]; 2942 } 2943 } 2944 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2945 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2946 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2947 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2948 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2949 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2950 PetscFunctionReturn(0); 2951 } 2952 2953 #undef __FUNCT__ 2954 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2955 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2956 { 2957 PetscErrorCode ierr; 2958 Mat *dummy; 2959 2960 PetscFunctionBegin; 2961 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2962 *newmat = *dummy; 2963 ierr = PetscFree(dummy);CHKERRQ(ierr); 2964 PetscFunctionReturn(0); 2965 } 2966 2967 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2968 2969 #undef __FUNCT__ 2970 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2971 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2972 { 2973 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2974 PetscErrorCode ierr; 2975 2976 PetscFunctionBegin; 2977 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2978 PetscFunctionReturn(0); 2979 } 2980 2981 2982 /* -------------------------------------------------------------------*/ 2983 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2984 MatGetRow_MPIAIJ, 2985 MatRestoreRow_MPIAIJ, 2986 MatMult_MPIAIJ, 2987 /* 4*/ MatMultAdd_MPIAIJ, 2988 MatMultTranspose_MPIAIJ, 2989 MatMultTransposeAdd_MPIAIJ, 2990 #ifdef PETSC_HAVE_PBGL 2991 MatSolve_MPIAIJ, 2992 #else 2993 0, 2994 #endif 2995 0, 2996 0, 2997 /*10*/ 0, 2998 0, 2999 0, 3000 MatSOR_MPIAIJ, 3001 MatTranspose_MPIAIJ, 3002 /*15*/ MatGetInfo_MPIAIJ, 3003 MatEqual_MPIAIJ, 3004 MatGetDiagonal_MPIAIJ, 3005 MatDiagonalScale_MPIAIJ, 3006 MatNorm_MPIAIJ, 3007 /*20*/ MatAssemblyBegin_MPIAIJ, 3008 MatAssemblyEnd_MPIAIJ, 3009 MatSetOption_MPIAIJ, 3010 MatZeroEntries_MPIAIJ, 3011 /*24*/ MatZeroRows_MPIAIJ, 3012 0, 3013 #ifdef PETSC_HAVE_PBGL 3014 0, 3015 #else 3016 0, 3017 #endif 3018 0, 3019 0, 3020 /*29*/ MatSetUpPreallocation_MPIAIJ, 3021 #ifdef PETSC_HAVE_PBGL 3022 0, 3023 #else 3024 0, 3025 #endif 3026 0, 3027 0, 3028 0, 3029 /*34*/ MatDuplicate_MPIAIJ, 3030 0, 3031 0, 3032 0, 3033 0, 3034 /*39*/ MatAXPY_MPIAIJ, 3035 MatGetSubMatrices_MPIAIJ, 3036 MatIncreaseOverlap_MPIAIJ, 3037 MatGetValues_MPIAIJ, 3038 MatCopy_MPIAIJ, 3039 /*44*/ MatGetRowMax_MPIAIJ, 3040 MatScale_MPIAIJ, 3041 0, 3042 0, 3043 MatZeroRowsColumns_MPIAIJ, 3044 /*49*/ MatSetBlockSize_MPIAIJ, 3045 0, 3046 0, 3047 0, 3048 0, 3049 /*54*/ MatFDColoringCreate_MPIAIJ, 3050 0, 3051 MatSetUnfactored_MPIAIJ, 3052 MatPermute_MPIAIJ, 3053 0, 3054 /*59*/ MatGetSubMatrix_MPIAIJ, 3055 MatDestroy_MPIAIJ, 3056 MatView_MPIAIJ, 3057 0, 3058 0, 3059 /*64*/ 0, 3060 0, 3061 0, 3062 0, 3063 0, 3064 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3065 MatGetRowMinAbs_MPIAIJ, 3066 0, 3067 MatSetColoring_MPIAIJ, 3068 #if defined(PETSC_HAVE_ADIC) 3069 MatSetValuesAdic_MPIAIJ, 3070 #else 3071 0, 3072 #endif 3073 MatSetValuesAdifor_MPIAIJ, 3074 /*75*/ MatFDColoringApply_AIJ, 3075 0, 3076 0, 3077 0, 3078 0, 3079 /*80*/ 0, 3080 0, 3081 0, 3082 /*83*/ MatLoad_MPIAIJ, 3083 0, 3084 0, 3085 0, 3086 0, 3087 0, 3088 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3089 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3090 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3091 MatPtAP_Basic, 3092 MatPtAPSymbolic_MPIAIJ, 3093 /*94*/ MatPtAPNumeric_MPIAIJ, 3094 0, 3095 0, 3096 0, 3097 0, 3098 /*99*/ 0, 3099 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3100 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3101 MatConjugate_MPIAIJ, 3102 0, 3103 /*104*/MatSetValuesRow_MPIAIJ, 3104 MatRealPart_MPIAIJ, 3105 MatImaginaryPart_MPIAIJ, 3106 0, 3107 0, 3108 /*109*/0, 3109 MatGetRedundantMatrix_MPIAIJ, 3110 MatGetRowMin_MPIAIJ, 3111 0, 3112 0, 3113 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 3114 0, 3115 0, 3116 0, 3117 0, 3118 /*119*/0, 3119 0, 3120 0, 3121 0, 3122 MatGetMultiProcBlock_MPIAIJ, 3123 /*124*/MatFindNonZeroRows_MPIAIJ, 3124 MatGetColumnNorms_MPIAIJ, 3125 MatInvertBlockDiagonal_MPIAIJ, 3126 0, 3127 MatGetSubMatricesParallel_MPIAIJ, 3128 /*129*/0 3129 }; 3130 3131 /* ----------------------------------------------------------------------------------------*/ 3132 3133 EXTERN_C_BEGIN 3134 #undef __FUNCT__ 3135 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3136 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3137 { 3138 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3139 PetscErrorCode ierr; 3140 3141 PetscFunctionBegin; 3142 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3143 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3144 PetscFunctionReturn(0); 3145 } 3146 EXTERN_C_END 3147 3148 EXTERN_C_BEGIN 3149 #undef __FUNCT__ 3150 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3151 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3152 { 3153 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3154 PetscErrorCode ierr; 3155 3156 PetscFunctionBegin; 3157 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3158 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3159 PetscFunctionReturn(0); 3160 } 3161 EXTERN_C_END 3162 3163 EXTERN_C_BEGIN 3164 #undef __FUNCT__ 3165 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3166 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3167 { 3168 Mat_MPIAIJ *b; 3169 PetscErrorCode ierr; 3170 PetscInt i; 3171 3172 PetscFunctionBegin; 3173 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3174 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3175 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3176 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3177 3178 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3179 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3180 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3181 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3182 if (d_nnz) { 3183 for (i=0; i<B->rmap->n; i++) { 3184 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3185 } 3186 } 3187 if (o_nnz) { 3188 for (i=0; i<B->rmap->n; i++) { 3189 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3190 } 3191 } 3192 b = (Mat_MPIAIJ*)B->data; 3193 3194 if (!B->preallocated) { 3195 /* Explicitly create 2 MATSEQAIJ matrices. */ 3196 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3197 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3198 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3199 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3200 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3201 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3202 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3203 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3204 } 3205 3206 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3207 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3208 B->preallocated = PETSC_TRUE; 3209 PetscFunctionReturn(0); 3210 } 3211 EXTERN_C_END 3212 3213 #undef __FUNCT__ 3214 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3215 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3216 { 3217 Mat mat; 3218 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3219 PetscErrorCode ierr; 3220 3221 PetscFunctionBegin; 3222 *newmat = 0; 3223 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3224 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3225 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3226 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3227 a = (Mat_MPIAIJ*)mat->data; 3228 3229 mat->factortype = matin->factortype; 3230 mat->rmap->bs = matin->rmap->bs; 3231 mat->assembled = PETSC_TRUE; 3232 mat->insertmode = NOT_SET_VALUES; 3233 mat->preallocated = PETSC_TRUE; 3234 3235 a->size = oldmat->size; 3236 a->rank = oldmat->rank; 3237 a->donotstash = oldmat->donotstash; 3238 a->roworiented = oldmat->roworiented; 3239 a->rowindices = 0; 3240 a->rowvalues = 0; 3241 a->getrowactive = PETSC_FALSE; 3242 3243 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3244 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3245 3246 if (oldmat->colmap) { 3247 #if defined (PETSC_USE_CTABLE) 3248 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3249 #else 3250 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3251 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3252 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3253 #endif 3254 } else a->colmap = 0; 3255 if (oldmat->garray) { 3256 PetscInt len; 3257 len = oldmat->B->cmap->n; 3258 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3259 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3260 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3261 } else a->garray = 0; 3262 3263 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3264 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3265 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3266 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3267 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3268 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3269 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3270 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3271 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3272 *newmat = mat; 3273 PetscFunctionReturn(0); 3274 } 3275 3276 3277 3278 #undef __FUNCT__ 3279 #define __FUNCT__ "MatLoad_MPIAIJ" 3280 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3281 { 3282 PetscScalar *vals,*svals; 3283 MPI_Comm comm = ((PetscObject)viewer)->comm; 3284 PetscErrorCode ierr; 3285 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3286 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3287 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3288 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3289 PetscInt cend,cstart,n,*rowners,sizesset=1; 3290 int fd; 3291 3292 PetscFunctionBegin; 3293 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3294 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3295 if (!rank) { 3296 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3297 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3298 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3299 } 3300 3301 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3302 3303 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3304 M = header[1]; N = header[2]; 3305 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3306 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3307 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3308 3309 /* If global sizes are set, check if they are consistent with that given in the file */ 3310 if (sizesset) { 3311 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3312 } 3313 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3314 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3315 3316 /* determine ownership of all rows */ 3317 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3318 else m = newMat->rmap->n; /* Set by user */ 3319 3320 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3321 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3322 3323 /* First process needs enough room for process with most rows */ 3324 if (!rank) { 3325 mmax = rowners[1]; 3326 for (i=2; i<size; i++) { 3327 mmax = PetscMax(mmax,rowners[i]); 3328 } 3329 } else mmax = m; 3330 3331 rowners[0] = 0; 3332 for (i=2; i<=size; i++) { 3333 rowners[i] += rowners[i-1]; 3334 } 3335 rstart = rowners[rank]; 3336 rend = rowners[rank+1]; 3337 3338 /* distribute row lengths to all processors */ 3339 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3340 if (!rank) { 3341 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3342 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3343 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3344 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3345 for (j=0; j<m; j++) { 3346 procsnz[0] += ourlens[j]; 3347 } 3348 for (i=1; i<size; i++) { 3349 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3350 /* calculate the number of nonzeros on each processor */ 3351 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3352 procsnz[i] += rowlengths[j]; 3353 } 3354 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3355 } 3356 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3357 } else { 3358 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3359 } 3360 3361 if (!rank) { 3362 /* determine max buffer needed and allocate it */ 3363 maxnz = 0; 3364 for (i=0; i<size; i++) { 3365 maxnz = PetscMax(maxnz,procsnz[i]); 3366 } 3367 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3368 3369 /* read in my part of the matrix column indices */ 3370 nz = procsnz[0]; 3371 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3372 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3373 3374 /* read in every one elses and ship off */ 3375 for (i=1; i<size; i++) { 3376 nz = procsnz[i]; 3377 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3378 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3379 } 3380 ierr = PetscFree(cols);CHKERRQ(ierr); 3381 } else { 3382 /* determine buffer space needed for message */ 3383 nz = 0; 3384 for (i=0; i<m; i++) { 3385 nz += ourlens[i]; 3386 } 3387 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3388 3389 /* receive message of column indices*/ 3390 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3391 } 3392 3393 /* determine column ownership if matrix is not square */ 3394 if (N != M) { 3395 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3396 else n = newMat->cmap->n; 3397 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3398 cstart = cend - n; 3399 } else { 3400 cstart = rstart; 3401 cend = rend; 3402 n = cend - cstart; 3403 } 3404 3405 /* loop over local rows, determining number of off diagonal entries */ 3406 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3407 jj = 0; 3408 for (i=0; i<m; i++) { 3409 for (j=0; j<ourlens[i]; j++) { 3410 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3411 jj++; 3412 } 3413 } 3414 3415 for (i=0; i<m; i++) { 3416 ourlens[i] -= offlens[i]; 3417 } 3418 if (!sizesset) { 3419 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3420 } 3421 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3422 3423 for (i=0; i<m; i++) { 3424 ourlens[i] += offlens[i]; 3425 } 3426 3427 if (!rank) { 3428 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3429 3430 /* read in my part of the matrix numerical values */ 3431 nz = procsnz[0]; 3432 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3433 3434 /* insert into matrix */ 3435 jj = rstart; 3436 smycols = mycols; 3437 svals = vals; 3438 for (i=0; i<m; i++) { 3439 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3440 smycols += ourlens[i]; 3441 svals += ourlens[i]; 3442 jj++; 3443 } 3444 3445 /* read in other processors and ship out */ 3446 for (i=1; i<size; i++) { 3447 nz = procsnz[i]; 3448 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3449 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3450 } 3451 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3452 } else { 3453 /* receive numeric values */ 3454 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3455 3456 /* receive message of values*/ 3457 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3458 3459 /* insert into matrix */ 3460 jj = rstart; 3461 smycols = mycols; 3462 svals = vals; 3463 for (i=0; i<m; i++) { 3464 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3465 smycols += ourlens[i]; 3466 svals += ourlens[i]; 3467 jj++; 3468 } 3469 } 3470 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3471 ierr = PetscFree(vals);CHKERRQ(ierr); 3472 ierr = PetscFree(mycols);CHKERRQ(ierr); 3473 ierr = PetscFree(rowners);CHKERRQ(ierr); 3474 3475 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3476 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3477 PetscFunctionReturn(0); 3478 } 3479 3480 #undef __FUNCT__ 3481 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3482 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3483 { 3484 PetscErrorCode ierr; 3485 IS iscol_local; 3486 PetscInt csize; 3487 3488 PetscFunctionBegin; 3489 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3490 if (call == MAT_REUSE_MATRIX) { 3491 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3492 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3493 } else { 3494 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3495 } 3496 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3497 if (call == MAT_INITIAL_MATRIX) { 3498 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3499 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3500 } 3501 PetscFunctionReturn(0); 3502 } 3503 3504 #undef __FUNCT__ 3505 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3506 /* 3507 Not great since it makes two copies of the submatrix, first an SeqAIJ 3508 in local and then by concatenating the local matrices the end result. 3509 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3510 3511 Note: This requires a sequential iscol with all indices. 3512 */ 3513 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3514 { 3515 PetscErrorCode ierr; 3516 PetscMPIInt rank,size; 3517 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3518 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3519 Mat *local,M,Mreuse; 3520 MatScalar *vwork,*aa; 3521 MPI_Comm comm = ((PetscObject)mat)->comm; 3522 Mat_SeqAIJ *aij; 3523 3524 3525 PetscFunctionBegin; 3526 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3527 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3528 3529 if (call == MAT_REUSE_MATRIX) { 3530 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3531 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3532 local = &Mreuse; 3533 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3534 } else { 3535 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3536 Mreuse = *local; 3537 ierr = PetscFree(local);CHKERRQ(ierr); 3538 } 3539 3540 /* 3541 m - number of local rows 3542 n - number of columns (same on all processors) 3543 rstart - first row in new global matrix generated 3544 */ 3545 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3546 if (call == MAT_INITIAL_MATRIX) { 3547 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3548 ii = aij->i; 3549 jj = aij->j; 3550 3551 /* 3552 Determine the number of non-zeros in the diagonal and off-diagonal 3553 portions of the matrix in order to do correct preallocation 3554 */ 3555 3556 /* first get start and end of "diagonal" columns */ 3557 if (csize == PETSC_DECIDE) { 3558 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3559 if (mglobal == n) { /* square matrix */ 3560 nlocal = m; 3561 } else { 3562 nlocal = n/size + ((n % size) > rank); 3563 } 3564 } else { 3565 nlocal = csize; 3566 } 3567 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3568 rstart = rend - nlocal; 3569 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3570 3571 /* next, compute all the lengths */ 3572 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3573 olens = dlens + m; 3574 for (i=0; i<m; i++) { 3575 jend = ii[i+1] - ii[i]; 3576 olen = 0; 3577 dlen = 0; 3578 for (j=0; j<jend; j++) { 3579 if (*jj < rstart || *jj >= rend) olen++; 3580 else dlen++; 3581 jj++; 3582 } 3583 olens[i] = olen; 3584 dlens[i] = dlen; 3585 } 3586 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3587 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3588 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3589 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3590 ierr = PetscFree(dlens);CHKERRQ(ierr); 3591 } else { 3592 PetscInt ml,nl; 3593 3594 M = *newmat; 3595 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3596 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3597 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3598 /* 3599 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3600 rather than the slower MatSetValues(). 3601 */ 3602 M->was_assembled = PETSC_TRUE; 3603 M->assembled = PETSC_FALSE; 3604 } 3605 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3606 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3607 ii = aij->i; 3608 jj = aij->j; 3609 aa = aij->a; 3610 for (i=0; i<m; i++) { 3611 row = rstart + i; 3612 nz = ii[i+1] - ii[i]; 3613 cwork = jj; jj += nz; 3614 vwork = aa; aa += nz; 3615 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3616 } 3617 3618 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3619 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3620 *newmat = M; 3621 3622 /* save submatrix used in processor for next request */ 3623 if (call == MAT_INITIAL_MATRIX) { 3624 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3625 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3626 } 3627 3628 PetscFunctionReturn(0); 3629 } 3630 3631 EXTERN_C_BEGIN 3632 #undef __FUNCT__ 3633 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3634 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3635 { 3636 PetscInt m,cstart, cend,j,nnz,i,d; 3637 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3638 const PetscInt *JJ; 3639 PetscScalar *values; 3640 PetscErrorCode ierr; 3641 3642 PetscFunctionBegin; 3643 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3644 3645 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3646 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3647 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3648 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3649 m = B->rmap->n; 3650 cstart = B->cmap->rstart; 3651 cend = B->cmap->rend; 3652 rstart = B->rmap->rstart; 3653 3654 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3655 3656 #if defined(PETSC_USE_DEBUGGING) 3657 for (i=0; i<m; i++) { 3658 nnz = Ii[i+1]- Ii[i]; 3659 JJ = J + Ii[i]; 3660 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3661 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3662 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3663 } 3664 #endif 3665 3666 for (i=0; i<m; i++) { 3667 nnz = Ii[i+1]- Ii[i]; 3668 JJ = J + Ii[i]; 3669 nnz_max = PetscMax(nnz_max,nnz); 3670 d = 0; 3671 for (j=0; j<nnz; j++) { 3672 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3673 } 3674 d_nnz[i] = d; 3675 o_nnz[i] = nnz - d; 3676 } 3677 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3678 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3679 3680 if (v) values = (PetscScalar*)v; 3681 else { 3682 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3683 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3684 } 3685 3686 for (i=0; i<m; i++) { 3687 ii = i + rstart; 3688 nnz = Ii[i+1]- Ii[i]; 3689 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3690 } 3691 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3693 3694 if (!v) { 3695 ierr = PetscFree(values);CHKERRQ(ierr); 3696 } 3697 PetscFunctionReturn(0); 3698 } 3699 EXTERN_C_END 3700 3701 #undef __FUNCT__ 3702 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3703 /*@ 3704 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3705 (the default parallel PETSc format). 3706 3707 Collective on MPI_Comm 3708 3709 Input Parameters: 3710 + B - the matrix 3711 . i - the indices into j for the start of each local row (starts with zero) 3712 . j - the column indices for each local row (starts with zero) 3713 - v - optional values in the matrix 3714 3715 Level: developer 3716 3717 Notes: 3718 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3719 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3720 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3721 3722 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3723 3724 The format which is used for the sparse matrix input, is equivalent to a 3725 row-major ordering.. i.e for the following matrix, the input data expected is 3726 as shown: 3727 3728 1 0 0 3729 2 0 3 P0 3730 ------- 3731 4 5 6 P1 3732 3733 Process0 [P0]: rows_owned=[0,1] 3734 i = {0,1,3} [size = nrow+1 = 2+1] 3735 j = {0,0,2} [size = nz = 6] 3736 v = {1,2,3} [size = nz = 6] 3737 3738 Process1 [P1]: rows_owned=[2] 3739 i = {0,3} [size = nrow+1 = 1+1] 3740 j = {0,1,2} [size = nz = 6] 3741 v = {4,5,6} [size = nz = 6] 3742 3743 .keywords: matrix, aij, compressed row, sparse, parallel 3744 3745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3746 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3747 @*/ 3748 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3749 { 3750 PetscErrorCode ierr; 3751 3752 PetscFunctionBegin; 3753 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3754 PetscFunctionReturn(0); 3755 } 3756 3757 #undef __FUNCT__ 3758 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3759 /*@C 3760 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3761 (the default parallel PETSc format). For good matrix assembly performance 3762 the user should preallocate the matrix storage by setting the parameters 3763 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3764 performance can be increased by more than a factor of 50. 3765 3766 Collective on MPI_Comm 3767 3768 Input Parameters: 3769 + A - the matrix 3770 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3771 (same value is used for all local rows) 3772 . d_nnz - array containing the number of nonzeros in the various rows of the 3773 DIAGONAL portion of the local submatrix (possibly different for each row) 3774 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3775 The size of this array is equal to the number of local rows, i.e 'm'. 3776 You must leave room for the diagonal entry even if it is zero. 3777 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3778 submatrix (same value is used for all local rows). 3779 - o_nnz - array containing the number of nonzeros in the various rows of the 3780 OFF-DIAGONAL portion of the local submatrix (possibly different for 3781 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3782 structure. The size of this array is equal to the number 3783 of local rows, i.e 'm'. 3784 3785 If the *_nnz parameter is given then the *_nz parameter is ignored 3786 3787 The AIJ format (also called the Yale sparse matrix format or 3788 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3789 storage. The stored row and column indices begin with zero. 3790 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3791 3792 The parallel matrix is partitioned such that the first m0 rows belong to 3793 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3794 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3795 3796 The DIAGONAL portion of the local submatrix of a processor can be defined 3797 as the submatrix which is obtained by extraction the part corresponding to 3798 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3799 first row that belongs to the processor, r2 is the last row belonging to 3800 the this processor, and c1-c2 is range of indices of the local part of a 3801 vector suitable for applying the matrix to. This is an mxn matrix. In the 3802 common case of a square matrix, the row and column ranges are the same and 3803 the DIAGONAL part is also square. The remaining portion of the local 3804 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3805 3806 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3807 3808 You can call MatGetInfo() to get information on how effective the preallocation was; 3809 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3810 You can also run with the option -info and look for messages with the string 3811 malloc in them to see if additional memory allocation was needed. 3812 3813 Example usage: 3814 3815 Consider the following 8x8 matrix with 34 non-zero values, that is 3816 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3817 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3818 as follows: 3819 3820 .vb 3821 1 2 0 | 0 3 0 | 0 4 3822 Proc0 0 5 6 | 7 0 0 | 8 0 3823 9 0 10 | 11 0 0 | 12 0 3824 ------------------------------------- 3825 13 0 14 | 15 16 17 | 0 0 3826 Proc1 0 18 0 | 19 20 21 | 0 0 3827 0 0 0 | 22 23 0 | 24 0 3828 ------------------------------------- 3829 Proc2 25 26 27 | 0 0 28 | 29 0 3830 30 0 0 | 31 32 33 | 0 34 3831 .ve 3832 3833 This can be represented as a collection of submatrices as: 3834 3835 .vb 3836 A B C 3837 D E F 3838 G H I 3839 .ve 3840 3841 Where the submatrices A,B,C are owned by proc0, D,E,F are 3842 owned by proc1, G,H,I are owned by proc2. 3843 3844 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3845 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3846 The 'M','N' parameters are 8,8, and have the same values on all procs. 3847 3848 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3849 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3850 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3851 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3852 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3853 matrix, ans [DF] as another SeqAIJ matrix. 3854 3855 When d_nz, o_nz parameters are specified, d_nz storage elements are 3856 allocated for every row of the local diagonal submatrix, and o_nz 3857 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3858 One way to choose d_nz and o_nz is to use the max nonzerors per local 3859 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3860 In this case, the values of d_nz,o_nz are: 3861 .vb 3862 proc0 : dnz = 2, o_nz = 2 3863 proc1 : dnz = 3, o_nz = 2 3864 proc2 : dnz = 1, o_nz = 4 3865 .ve 3866 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3867 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3868 for proc3. i.e we are using 12+15+10=37 storage locations to store 3869 34 values. 3870 3871 When d_nnz, o_nnz parameters are specified, the storage is specified 3872 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3873 In the above case the values for d_nnz,o_nnz are: 3874 .vb 3875 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3876 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3877 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3878 .ve 3879 Here the space allocated is sum of all the above values i.e 34, and 3880 hence pre-allocation is perfect. 3881 3882 Level: intermediate 3883 3884 .keywords: matrix, aij, compressed row, sparse, parallel 3885 3886 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3887 MPIAIJ, MatGetInfo() 3888 @*/ 3889 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3890 { 3891 PetscErrorCode ierr; 3892 3893 PetscFunctionBegin; 3894 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3895 PetscFunctionReturn(0); 3896 } 3897 3898 #undef __FUNCT__ 3899 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3900 /*@ 3901 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3902 CSR format the local rows. 3903 3904 Collective on MPI_Comm 3905 3906 Input Parameters: 3907 + comm - MPI communicator 3908 . m - number of local rows (Cannot be PETSC_DECIDE) 3909 . n - This value should be the same as the local size used in creating the 3910 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3911 calculated if N is given) For square matrices n is almost always m. 3912 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3913 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3914 . i - row indices 3915 . j - column indices 3916 - a - matrix values 3917 3918 Output Parameter: 3919 . mat - the matrix 3920 3921 Level: intermediate 3922 3923 Notes: 3924 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3925 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3926 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3927 3928 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3929 3930 The format which is used for the sparse matrix input, is equivalent to a 3931 row-major ordering.. i.e for the following matrix, the input data expected is 3932 as shown: 3933 3934 1 0 0 3935 2 0 3 P0 3936 ------- 3937 4 5 6 P1 3938 3939 Process0 [P0]: rows_owned=[0,1] 3940 i = {0,1,3} [size = nrow+1 = 2+1] 3941 j = {0,0,2} [size = nz = 6] 3942 v = {1,2,3} [size = nz = 6] 3943 3944 Process1 [P1]: rows_owned=[2] 3945 i = {0,3} [size = nrow+1 = 1+1] 3946 j = {0,1,2} [size = nz = 6] 3947 v = {4,5,6} [size = nz = 6] 3948 3949 .keywords: matrix, aij, compressed row, sparse, parallel 3950 3951 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3952 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3953 @*/ 3954 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3955 { 3956 PetscErrorCode ierr; 3957 3958 PetscFunctionBegin; 3959 if (i[0]) { 3960 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3961 } 3962 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3963 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3964 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3965 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3966 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3967 PetscFunctionReturn(0); 3968 } 3969 3970 #undef __FUNCT__ 3971 #define __FUNCT__ "MatCreateMPIAIJ" 3972 /*@C 3973 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3974 (the default parallel PETSc format). For good matrix assembly performance 3975 the user should preallocate the matrix storage by setting the parameters 3976 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3977 performance can be increased by more than a factor of 50. 3978 3979 Collective on MPI_Comm 3980 3981 Input Parameters: 3982 + comm - MPI communicator 3983 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3984 This value should be the same as the local size used in creating the 3985 y vector for the matrix-vector product y = Ax. 3986 . n - This value should be the same as the local size used in creating the 3987 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3988 calculated if N is given) For square matrices n is almost always m. 3989 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3990 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3991 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3992 (same value is used for all local rows) 3993 . d_nnz - array containing the number of nonzeros in the various rows of the 3994 DIAGONAL portion of the local submatrix (possibly different for each row) 3995 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3996 The size of this array is equal to the number of local rows, i.e 'm'. 3997 You must leave room for the diagonal entry even if it is zero. 3998 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3999 submatrix (same value is used for all local rows). 4000 - o_nnz - array containing the number of nonzeros in the various rows of the 4001 OFF-DIAGONAL portion of the local submatrix (possibly different for 4002 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4003 structure. The size of this array is equal to the number 4004 of local rows, i.e 'm'. 4005 4006 Output Parameter: 4007 . A - the matrix 4008 4009 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4010 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4011 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4012 4013 Notes: 4014 If the *_nnz parameter is given then the *_nz parameter is ignored 4015 4016 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4017 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4018 storage requirements for this matrix. 4019 4020 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4021 processor than it must be used on all processors that share the object for 4022 that argument. 4023 4024 The user MUST specify either the local or global matrix dimensions 4025 (possibly both). 4026 4027 The parallel matrix is partitioned across processors such that the 4028 first m0 rows belong to process 0, the next m1 rows belong to 4029 process 1, the next m2 rows belong to process 2 etc.. where 4030 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4031 values corresponding to [m x N] submatrix. 4032 4033 The columns are logically partitioned with the n0 columns belonging 4034 to 0th partition, the next n1 columns belonging to the next 4035 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4036 4037 The DIAGONAL portion of the local submatrix on any given processor 4038 is the submatrix corresponding to the rows and columns m,n 4039 corresponding to the given processor. i.e diagonal matrix on 4040 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4041 etc. The remaining portion of the local submatrix [m x (N-n)] 4042 constitute the OFF-DIAGONAL portion. The example below better 4043 illustrates this concept. 4044 4045 For a square global matrix we define each processor's diagonal portion 4046 to be its local rows and the corresponding columns (a square submatrix); 4047 each processor's off-diagonal portion encompasses the remainder of the 4048 local matrix (a rectangular submatrix). 4049 4050 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4051 4052 When calling this routine with a single process communicator, a matrix of 4053 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4054 type of communicator, use the construction mechanism: 4055 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4056 4057 By default, this format uses inodes (identical nodes) when possible. 4058 We search for consecutive rows with the same nonzero structure, thereby 4059 reusing matrix information to achieve increased efficiency. 4060 4061 Options Database Keys: 4062 + -mat_no_inode - Do not use inodes 4063 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4064 - -mat_aij_oneindex - Internally use indexing starting at 1 4065 rather than 0. Note that when calling MatSetValues(), 4066 the user still MUST index entries starting at 0! 4067 4068 4069 Example usage: 4070 4071 Consider the following 8x8 matrix with 34 non-zero values, that is 4072 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4073 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4074 as follows: 4075 4076 .vb 4077 1 2 0 | 0 3 0 | 0 4 4078 Proc0 0 5 6 | 7 0 0 | 8 0 4079 9 0 10 | 11 0 0 | 12 0 4080 ------------------------------------- 4081 13 0 14 | 15 16 17 | 0 0 4082 Proc1 0 18 0 | 19 20 21 | 0 0 4083 0 0 0 | 22 23 0 | 24 0 4084 ------------------------------------- 4085 Proc2 25 26 27 | 0 0 28 | 29 0 4086 30 0 0 | 31 32 33 | 0 34 4087 .ve 4088 4089 This can be represented as a collection of submatrices as: 4090 4091 .vb 4092 A B C 4093 D E F 4094 G H I 4095 .ve 4096 4097 Where the submatrices A,B,C are owned by proc0, D,E,F are 4098 owned by proc1, G,H,I are owned by proc2. 4099 4100 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4101 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4102 The 'M','N' parameters are 8,8, and have the same values on all procs. 4103 4104 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4105 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4106 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4107 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4108 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4109 matrix, ans [DF] as another SeqAIJ matrix. 4110 4111 When d_nz, o_nz parameters are specified, d_nz storage elements are 4112 allocated for every row of the local diagonal submatrix, and o_nz 4113 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4114 One way to choose d_nz and o_nz is to use the max nonzerors per local 4115 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4116 In this case, the values of d_nz,o_nz are: 4117 .vb 4118 proc0 : dnz = 2, o_nz = 2 4119 proc1 : dnz = 3, o_nz = 2 4120 proc2 : dnz = 1, o_nz = 4 4121 .ve 4122 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4123 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4124 for proc3. i.e we are using 12+15+10=37 storage locations to store 4125 34 values. 4126 4127 When d_nnz, o_nnz parameters are specified, the storage is specified 4128 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4129 In the above case the values for d_nnz,o_nnz are: 4130 .vb 4131 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4132 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4133 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4134 .ve 4135 Here the space allocated is sum of all the above values i.e 34, and 4136 hence pre-allocation is perfect. 4137 4138 Level: intermediate 4139 4140 .keywords: matrix, aij, compressed row, sparse, parallel 4141 4142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4143 MPIAIJ, MatCreateMPIAIJWithArrays() 4144 @*/ 4145 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4146 { 4147 PetscErrorCode ierr; 4148 PetscMPIInt size; 4149 4150 PetscFunctionBegin; 4151 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4152 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4153 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4154 if (size > 1) { 4155 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4156 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4157 } else { 4158 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4159 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4160 } 4161 PetscFunctionReturn(0); 4162 } 4163 4164 #undef __FUNCT__ 4165 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4166 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4167 { 4168 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4169 4170 PetscFunctionBegin; 4171 *Ad = a->A; 4172 *Ao = a->B; 4173 *colmap = a->garray; 4174 PetscFunctionReturn(0); 4175 } 4176 4177 #undef __FUNCT__ 4178 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4179 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4180 { 4181 PetscErrorCode ierr; 4182 PetscInt i; 4183 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4184 4185 PetscFunctionBegin; 4186 if (coloring->ctype == IS_COLORING_GLOBAL) { 4187 ISColoringValue *allcolors,*colors; 4188 ISColoring ocoloring; 4189 4190 /* set coloring for diagonal portion */ 4191 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4192 4193 /* set coloring for off-diagonal portion */ 4194 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4195 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4196 for (i=0; i<a->B->cmap->n; i++) { 4197 colors[i] = allcolors[a->garray[i]]; 4198 } 4199 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4200 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4201 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4202 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4203 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4204 ISColoringValue *colors; 4205 PetscInt *larray; 4206 ISColoring ocoloring; 4207 4208 /* set coloring for diagonal portion */ 4209 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4210 for (i=0; i<a->A->cmap->n; i++) { 4211 larray[i] = i + A->cmap->rstart; 4212 } 4213 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4214 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4215 for (i=0; i<a->A->cmap->n; i++) { 4216 colors[i] = coloring->colors[larray[i]]; 4217 } 4218 ierr = PetscFree(larray);CHKERRQ(ierr); 4219 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4220 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4221 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4222 4223 /* set coloring for off-diagonal portion */ 4224 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4225 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4226 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4227 for (i=0; i<a->B->cmap->n; i++) { 4228 colors[i] = coloring->colors[larray[i]]; 4229 } 4230 ierr = PetscFree(larray);CHKERRQ(ierr); 4231 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4232 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4233 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4234 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4235 4236 PetscFunctionReturn(0); 4237 } 4238 4239 #if defined(PETSC_HAVE_ADIC) 4240 #undef __FUNCT__ 4241 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4242 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4243 { 4244 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4245 PetscErrorCode ierr; 4246 4247 PetscFunctionBegin; 4248 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4249 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4250 PetscFunctionReturn(0); 4251 } 4252 #endif 4253 4254 #undef __FUNCT__ 4255 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4256 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4257 { 4258 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4259 PetscErrorCode ierr; 4260 4261 PetscFunctionBegin; 4262 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4263 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4264 PetscFunctionReturn(0); 4265 } 4266 4267 #undef __FUNCT__ 4268 #define __FUNCT__ "MatMerge" 4269 /*@ 4270 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4271 matrices from each processor 4272 4273 Collective on MPI_Comm 4274 4275 Input Parameters: 4276 + comm - the communicators the parallel matrix will live on 4277 . inmat - the input sequential matrices 4278 . n - number of local columns (or PETSC_DECIDE) 4279 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4280 4281 Output Parameter: 4282 . outmat - the parallel matrix generated 4283 4284 Level: advanced 4285 4286 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4287 4288 @*/ 4289 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4290 { 4291 PetscErrorCode ierr; 4292 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4293 PetscInt *indx; 4294 PetscScalar *values; 4295 4296 PetscFunctionBegin; 4297 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4298 if (scall == MAT_INITIAL_MATRIX){ 4299 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4300 if (n == PETSC_DECIDE){ 4301 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4302 } 4303 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4304 rstart -= m; 4305 4306 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4307 for (i=0;i<m;i++) { 4308 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4309 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4310 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4311 } 4312 /* This routine will ONLY return MPIAIJ type matrix */ 4313 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4314 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4315 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4316 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4317 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4318 4319 } else if (scall == MAT_REUSE_MATRIX){ 4320 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4321 } else { 4322 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4323 } 4324 4325 for (i=0;i<m;i++) { 4326 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4327 Ii = i + rstart; 4328 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4329 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4330 } 4331 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4332 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4333 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4334 4335 PetscFunctionReturn(0); 4336 } 4337 4338 #undef __FUNCT__ 4339 #define __FUNCT__ "MatFileSplit" 4340 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4341 { 4342 PetscErrorCode ierr; 4343 PetscMPIInt rank; 4344 PetscInt m,N,i,rstart,nnz; 4345 size_t len; 4346 const PetscInt *indx; 4347 PetscViewer out; 4348 char *name; 4349 Mat B; 4350 const PetscScalar *values; 4351 4352 PetscFunctionBegin; 4353 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4354 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4355 /* Should this be the type of the diagonal block of A? */ 4356 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4357 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4358 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4359 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4360 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4361 for (i=0;i<m;i++) { 4362 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4363 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4364 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4365 } 4366 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4367 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4368 4369 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4370 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4371 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4372 sprintf(name,"%s.%d",outfile,rank); 4373 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4374 ierr = PetscFree(name); 4375 ierr = MatView(B,out);CHKERRQ(ierr); 4376 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4377 ierr = MatDestroy(&B);CHKERRQ(ierr); 4378 PetscFunctionReturn(0); 4379 } 4380 4381 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4382 #undef __FUNCT__ 4383 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4384 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4385 { 4386 PetscErrorCode ierr; 4387 Mat_Merge_SeqsToMPI *merge; 4388 PetscContainer container; 4389 4390 PetscFunctionBegin; 4391 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4392 if (container) { 4393 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4394 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4395 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4396 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4397 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4398 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4399 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4400 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4401 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4402 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4403 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4404 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4405 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4406 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4407 ierr = PetscFree(merge);CHKERRQ(ierr); 4408 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4409 } 4410 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4411 PetscFunctionReturn(0); 4412 } 4413 4414 #include <../src/mat/utils/freespace.h> 4415 #include <petscbt.h> 4416 4417 #undef __FUNCT__ 4418 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4419 /*@C 4420 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4421 matrices from each processor 4422 4423 Collective on MPI_Comm 4424 4425 Input Parameters: 4426 + comm - the communicators the parallel matrix will live on 4427 . seqmat - the input sequential matrices 4428 . m - number of local rows (or PETSC_DECIDE) 4429 . n - number of local columns (or PETSC_DECIDE) 4430 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4431 4432 Output Parameter: 4433 . mpimat - the parallel matrix generated 4434 4435 Level: advanced 4436 4437 Notes: 4438 The dimensions of the sequential matrix in each processor MUST be the same. 4439 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4440 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4441 @*/ 4442 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4443 { 4444 PetscErrorCode ierr; 4445 MPI_Comm comm=((PetscObject)mpimat)->comm; 4446 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4447 PetscMPIInt size,rank,taga,*len_s; 4448 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4449 PetscInt proc,m; 4450 PetscInt **buf_ri,**buf_rj; 4451 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4452 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4453 MPI_Request *s_waits,*r_waits; 4454 MPI_Status *status; 4455 MatScalar *aa=a->a; 4456 MatScalar **abuf_r,*ba_i; 4457 Mat_Merge_SeqsToMPI *merge; 4458 PetscContainer container; 4459 4460 PetscFunctionBegin; 4461 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4462 4463 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4464 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4465 4466 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4467 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4468 4469 bi = merge->bi; 4470 bj = merge->bj; 4471 buf_ri = merge->buf_ri; 4472 buf_rj = merge->buf_rj; 4473 4474 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4475 owners = merge->rowmap->range; 4476 len_s = merge->len_s; 4477 4478 /* send and recv matrix values */ 4479 /*-----------------------------*/ 4480 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4481 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4482 4483 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4484 for (proc=0,k=0; proc<size; proc++){ 4485 if (!len_s[proc]) continue; 4486 i = owners[proc]; 4487 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4488 k++; 4489 } 4490 4491 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4492 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4493 ierr = PetscFree(status);CHKERRQ(ierr); 4494 4495 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4496 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4497 4498 /* insert mat values of mpimat */ 4499 /*----------------------------*/ 4500 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4501 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4502 4503 for (k=0; k<merge->nrecv; k++){ 4504 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4505 nrows = *(buf_ri_k[k]); 4506 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4507 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4508 } 4509 4510 /* set values of ba */ 4511 m = merge->rowmap->n; 4512 for (i=0; i<m; i++) { 4513 arow = owners[rank] + i; 4514 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4515 bnzi = bi[i+1] - bi[i]; 4516 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4517 4518 /* add local non-zero vals of this proc's seqmat into ba */ 4519 anzi = ai[arow+1] - ai[arow]; 4520 aj = a->j + ai[arow]; 4521 aa = a->a + ai[arow]; 4522 nextaj = 0; 4523 for (j=0; nextaj<anzi; j++){ 4524 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4525 ba_i[j] += aa[nextaj++]; 4526 } 4527 } 4528 4529 /* add received vals into ba */ 4530 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4531 /* i-th row */ 4532 if (i == *nextrow[k]) { 4533 anzi = *(nextai[k]+1) - *nextai[k]; 4534 aj = buf_rj[k] + *(nextai[k]); 4535 aa = abuf_r[k] + *(nextai[k]); 4536 nextaj = 0; 4537 for (j=0; nextaj<anzi; j++){ 4538 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4539 ba_i[j] += aa[nextaj++]; 4540 } 4541 } 4542 nextrow[k]++; nextai[k]++; 4543 } 4544 } 4545 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4546 } 4547 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4548 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4549 4550 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4551 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4552 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4553 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4554 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4555 PetscFunctionReturn(0); 4556 } 4557 4558 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4559 4560 #undef __FUNCT__ 4561 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4562 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4563 { 4564 PetscErrorCode ierr; 4565 Mat B_mpi; 4566 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4567 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4568 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4569 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4570 PetscInt len,proc,*dnz,*onz; 4571 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4572 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4573 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4574 MPI_Status *status; 4575 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4576 PetscBT lnkbt; 4577 Mat_Merge_SeqsToMPI *merge; 4578 PetscContainer container; 4579 4580 PetscFunctionBegin; 4581 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4582 4583 /* make sure it is a PETSc comm */ 4584 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4585 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4586 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4587 4588 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4589 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4590 4591 /* determine row ownership */ 4592 /*---------------------------------------------------------*/ 4593 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4594 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4595 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4596 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4597 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4598 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4599 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4600 4601 m = merge->rowmap->n; 4602 M = merge->rowmap->N; 4603 owners = merge->rowmap->range; 4604 4605 /* determine the number of messages to send, their lengths */ 4606 /*---------------------------------------------------------*/ 4607 len_s = merge->len_s; 4608 4609 len = 0; /* length of buf_si[] */ 4610 merge->nsend = 0; 4611 for (proc=0; proc<size; proc++){ 4612 len_si[proc] = 0; 4613 if (proc == rank){ 4614 len_s[proc] = 0; 4615 } else { 4616 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4617 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4618 } 4619 if (len_s[proc]) { 4620 merge->nsend++; 4621 nrows = 0; 4622 for (i=owners[proc]; i<owners[proc+1]; i++){ 4623 if (ai[i+1] > ai[i]) nrows++; 4624 } 4625 len_si[proc] = 2*(nrows+1); 4626 len += len_si[proc]; 4627 } 4628 } 4629 4630 /* determine the number and length of messages to receive for ij-structure */ 4631 /*-------------------------------------------------------------------------*/ 4632 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4633 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4634 4635 /* post the Irecv of j-structure */ 4636 /*-------------------------------*/ 4637 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4638 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4639 4640 /* post the Isend of j-structure */ 4641 /*--------------------------------*/ 4642 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4643 4644 for (proc=0, k=0; proc<size; proc++){ 4645 if (!len_s[proc]) continue; 4646 i = owners[proc]; 4647 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4648 k++; 4649 } 4650 4651 /* receives and sends of j-structure are complete */ 4652 /*------------------------------------------------*/ 4653 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4654 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4655 4656 /* send and recv i-structure */ 4657 /*---------------------------*/ 4658 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4659 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4660 4661 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4662 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4663 for (proc=0,k=0; proc<size; proc++){ 4664 if (!len_s[proc]) continue; 4665 /* form outgoing message for i-structure: 4666 buf_si[0]: nrows to be sent 4667 [1:nrows]: row index (global) 4668 [nrows+1:2*nrows+1]: i-structure index 4669 */ 4670 /*-------------------------------------------*/ 4671 nrows = len_si[proc]/2 - 1; 4672 buf_si_i = buf_si + nrows+1; 4673 buf_si[0] = nrows; 4674 buf_si_i[0] = 0; 4675 nrows = 0; 4676 for (i=owners[proc]; i<owners[proc+1]; i++){ 4677 anzi = ai[i+1] - ai[i]; 4678 if (anzi) { 4679 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4680 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4681 nrows++; 4682 } 4683 } 4684 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4685 k++; 4686 buf_si += len_si[proc]; 4687 } 4688 4689 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4690 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4691 4692 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4693 for (i=0; i<merge->nrecv; i++){ 4694 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4695 } 4696 4697 ierr = PetscFree(len_si);CHKERRQ(ierr); 4698 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4699 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4700 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4701 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4702 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4703 ierr = PetscFree(status);CHKERRQ(ierr); 4704 4705 /* compute a local seq matrix in each processor */ 4706 /*----------------------------------------------*/ 4707 /* allocate bi array and free space for accumulating nonzero column info */ 4708 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4709 bi[0] = 0; 4710 4711 /* create and initialize a linked list */ 4712 nlnk = N+1; 4713 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4714 4715 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4716 len = 0; 4717 len = ai[owners[rank+1]] - ai[owners[rank]]; 4718 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4719 current_space = free_space; 4720 4721 /* determine symbolic info for each local row */ 4722 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4723 4724 for (k=0; k<merge->nrecv; k++){ 4725 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4726 nrows = *buf_ri_k[k]; 4727 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4728 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4729 } 4730 4731 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4732 len = 0; 4733 for (i=0;i<m;i++) { 4734 bnzi = 0; 4735 /* add local non-zero cols of this proc's seqmat into lnk */ 4736 arow = owners[rank] + i; 4737 anzi = ai[arow+1] - ai[arow]; 4738 aj = a->j + ai[arow]; 4739 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4740 bnzi += nlnk; 4741 /* add received col data into lnk */ 4742 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4743 if (i == *nextrow[k]) { /* i-th row */ 4744 anzi = *(nextai[k]+1) - *nextai[k]; 4745 aj = buf_rj[k] + *nextai[k]; 4746 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4747 bnzi += nlnk; 4748 nextrow[k]++; nextai[k]++; 4749 } 4750 } 4751 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4752 4753 /* if free space is not available, make more free space */ 4754 if (current_space->local_remaining<bnzi) { 4755 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4756 nspacedouble++; 4757 } 4758 /* copy data into free space, then initialize lnk */ 4759 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4760 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4761 4762 current_space->array += bnzi; 4763 current_space->local_used += bnzi; 4764 current_space->local_remaining -= bnzi; 4765 4766 bi[i+1] = bi[i] + bnzi; 4767 } 4768 4769 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4770 4771 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4772 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4773 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4774 4775 /* create symbolic parallel matrix B_mpi */ 4776 /*---------------------------------------*/ 4777 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4778 if (n==PETSC_DECIDE) { 4779 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4780 } else { 4781 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4782 } 4783 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4784 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4785 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4786 4787 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4788 B_mpi->assembled = PETSC_FALSE; 4789 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4790 merge->bi = bi; 4791 merge->bj = bj; 4792 merge->buf_ri = buf_ri; 4793 merge->buf_rj = buf_rj; 4794 merge->coi = PETSC_NULL; 4795 merge->coj = PETSC_NULL; 4796 merge->owners_co = PETSC_NULL; 4797 4798 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4799 4800 /* attach the supporting struct to B_mpi for reuse */ 4801 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4802 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4803 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4804 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4805 *mpimat = B_mpi; 4806 4807 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4808 PetscFunctionReturn(0); 4809 } 4810 4811 #undef __FUNCT__ 4812 #define __FUNCT__ "MatMerge_SeqsToMPI" 4813 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4814 { 4815 PetscErrorCode ierr; 4816 4817 PetscFunctionBegin; 4818 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4819 if (scall == MAT_INITIAL_MATRIX){ 4820 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4821 } 4822 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4823 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4824 PetscFunctionReturn(0); 4825 } 4826 4827 #undef __FUNCT__ 4828 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4829 /*@ 4830 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4831 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4832 with MatGetSize() 4833 4834 Not Collective 4835 4836 Input Parameters: 4837 + A - the matrix 4838 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4839 4840 Output Parameter: 4841 . A_loc - the local sequential matrix generated 4842 4843 Level: developer 4844 4845 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4846 4847 @*/ 4848 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4849 { 4850 PetscErrorCode ierr; 4851 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4852 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4853 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4854 MatScalar *aa=a->a,*ba=b->a,*cam; 4855 PetscScalar *ca; 4856 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4857 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4858 PetscBool match; 4859 4860 PetscFunctionBegin; 4861 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4862 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4863 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4864 if (scall == MAT_INITIAL_MATRIX){ 4865 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4866 ci[0] = 0; 4867 for (i=0; i<am; i++){ 4868 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4869 } 4870 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4871 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4872 k = 0; 4873 for (i=0; i<am; i++) { 4874 ncols_o = bi[i+1] - bi[i]; 4875 ncols_d = ai[i+1] - ai[i]; 4876 /* off-diagonal portion of A */ 4877 for (jo=0; jo<ncols_o; jo++) { 4878 col = cmap[*bj]; 4879 if (col >= cstart) break; 4880 cj[k] = col; bj++; 4881 ca[k++] = *ba++; 4882 } 4883 /* diagonal portion of A */ 4884 for (j=0; j<ncols_d; j++) { 4885 cj[k] = cstart + *aj++; 4886 ca[k++] = *aa++; 4887 } 4888 /* off-diagonal portion of A */ 4889 for (j=jo; j<ncols_o; j++) { 4890 cj[k] = cmap[*bj++]; 4891 ca[k++] = *ba++; 4892 } 4893 } 4894 /* put together the new matrix */ 4895 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4896 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4897 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4898 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4899 mat->free_a = PETSC_TRUE; 4900 mat->free_ij = PETSC_TRUE; 4901 mat->nonew = 0; 4902 } else if (scall == MAT_REUSE_MATRIX){ 4903 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4904 ci = mat->i; cj = mat->j; cam = mat->a; 4905 for (i=0; i<am; i++) { 4906 /* off-diagonal portion of A */ 4907 ncols_o = bi[i+1] - bi[i]; 4908 for (jo=0; jo<ncols_o; jo++) { 4909 col = cmap[*bj]; 4910 if (col >= cstart) break; 4911 *cam++ = *ba++; bj++; 4912 } 4913 /* diagonal portion of A */ 4914 ncols_d = ai[i+1] - ai[i]; 4915 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4916 /* off-diagonal portion of A */ 4917 for (j=jo; j<ncols_o; j++) { 4918 *cam++ = *ba++; bj++; 4919 } 4920 } 4921 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4922 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4923 PetscFunctionReturn(0); 4924 } 4925 4926 #undef __FUNCT__ 4927 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4928 /*@C 4929 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4930 4931 Not Collective 4932 4933 Input Parameters: 4934 + A - the matrix 4935 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4936 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4937 4938 Output Parameter: 4939 . A_loc - the local sequential matrix generated 4940 4941 Level: developer 4942 4943 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4944 4945 @*/ 4946 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4947 { 4948 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4949 PetscErrorCode ierr; 4950 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4951 IS isrowa,iscola; 4952 Mat *aloc; 4953 PetscBool match; 4954 4955 PetscFunctionBegin; 4956 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4957 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4958 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4959 if (!row){ 4960 start = A->rmap->rstart; end = A->rmap->rend; 4961 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4962 } else { 4963 isrowa = *row; 4964 } 4965 if (!col){ 4966 start = A->cmap->rstart; 4967 cmap = a->garray; 4968 nzA = a->A->cmap->n; 4969 nzB = a->B->cmap->n; 4970 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4971 ncols = 0; 4972 for (i=0; i<nzB; i++) { 4973 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4974 else break; 4975 } 4976 imark = i; 4977 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4978 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4979 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4980 } else { 4981 iscola = *col; 4982 } 4983 if (scall != MAT_INITIAL_MATRIX){ 4984 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4985 aloc[0] = *A_loc; 4986 } 4987 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4988 *A_loc = aloc[0]; 4989 ierr = PetscFree(aloc);CHKERRQ(ierr); 4990 if (!row){ 4991 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4992 } 4993 if (!col){ 4994 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4995 } 4996 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4997 PetscFunctionReturn(0); 4998 } 4999 5000 #undef __FUNCT__ 5001 #define __FUNCT__ "MatGetBrowsOfAcols" 5002 /*@C 5003 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5004 5005 Collective on Mat 5006 5007 Input Parameters: 5008 + A,B - the matrices in mpiaij format 5009 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5010 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5011 5012 Output Parameter: 5013 + rowb, colb - index sets of rows and columns of B to extract 5014 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 5015 - B_seq - the sequential matrix generated 5016 5017 Level: developer 5018 5019 @*/ 5020 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 5021 { 5022 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5023 PetscErrorCode ierr; 5024 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5025 IS isrowb,iscolb; 5026 Mat *bseq; 5027 5028 PetscFunctionBegin; 5029 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5030 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5031 } 5032 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5033 5034 if (scall == MAT_INITIAL_MATRIX){ 5035 start = A->cmap->rstart; 5036 cmap = a->garray; 5037 nzA = a->A->cmap->n; 5038 nzB = a->B->cmap->n; 5039 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5040 ncols = 0; 5041 for (i=0; i<nzB; i++) { /* row < local row index */ 5042 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5043 else break; 5044 } 5045 imark = i; 5046 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5047 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5048 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5049 *brstart = imark; 5050 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5051 } else { 5052 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5053 isrowb = *rowb; iscolb = *colb; 5054 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5055 bseq[0] = *B_seq; 5056 } 5057 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5058 *B_seq = bseq[0]; 5059 ierr = PetscFree(bseq);CHKERRQ(ierr); 5060 if (!rowb){ 5061 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5062 } else { 5063 *rowb = isrowb; 5064 } 5065 if (!colb){ 5066 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5067 } else { 5068 *colb = iscolb; 5069 } 5070 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5071 PetscFunctionReturn(0); 5072 } 5073 5074 #undef __FUNCT__ 5075 #define __FUNCT__ "MatGetBrowsOfAoCols" 5076 /*@C 5077 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5078 of the OFF-DIAGONAL portion of local A 5079 5080 Collective on Mat 5081 5082 Input Parameters: 5083 + A,B - the matrices in mpiaij format 5084 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5085 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5086 . startsj_r - similar to startsj for receives 5087 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5088 5089 Output Parameter: 5090 + B_oth - the sequential matrix generated 5091 5092 Level: developer 5093 5094 @*/ 5095 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5096 { 5097 VecScatter_MPI_General *gen_to,*gen_from; 5098 PetscErrorCode ierr; 5099 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5100 Mat_SeqAIJ *b_oth; 5101 VecScatter ctx=a->Mvctx; 5102 MPI_Comm comm=((PetscObject)ctx)->comm; 5103 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5104 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5105 PetscScalar *rvalues,*svalues; 5106 MatScalar *b_otha,*bufa,*bufA; 5107 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5108 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5109 MPI_Status *sstatus,rstatus; 5110 PetscMPIInt jj; 5111 PetscInt *cols,sbs,rbs; 5112 PetscScalar *vals; 5113 5114 PetscFunctionBegin; 5115 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5116 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5117 } 5118 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5119 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5120 5121 gen_to = (VecScatter_MPI_General*)ctx->todata; 5122 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5123 rvalues = gen_from->values; /* holds the length of receiving row */ 5124 svalues = gen_to->values; /* holds the length of sending row */ 5125 nrecvs = gen_from->n; 5126 nsends = gen_to->n; 5127 5128 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5129 srow = gen_to->indices; /* local row index to be sent */ 5130 sstarts = gen_to->starts; 5131 sprocs = gen_to->procs; 5132 sstatus = gen_to->sstatus; 5133 sbs = gen_to->bs; 5134 rstarts = gen_from->starts; 5135 rprocs = gen_from->procs; 5136 rbs = gen_from->bs; 5137 5138 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5139 if (scall == MAT_INITIAL_MATRIX){ 5140 /* i-array */ 5141 /*---------*/ 5142 /* post receives */ 5143 for (i=0; i<nrecvs; i++){ 5144 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5145 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5146 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5147 } 5148 5149 /* pack the outgoing message */ 5150 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5151 sstartsj[0] = 0; rstartsj[0] = 0; 5152 len = 0; /* total length of j or a array to be sent */ 5153 k = 0; 5154 for (i=0; i<nsends; i++){ 5155 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5156 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5157 for (j=0; j<nrows; j++) { 5158 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5159 for (l=0; l<sbs; l++){ 5160 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5161 rowlen[j*sbs+l] = ncols; 5162 len += ncols; 5163 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5164 } 5165 k++; 5166 } 5167 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5168 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5169 } 5170 /* recvs and sends of i-array are completed */ 5171 i = nrecvs; 5172 while (i--) { 5173 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5174 } 5175 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5176 5177 /* allocate buffers for sending j and a arrays */ 5178 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5179 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5180 5181 /* create i-array of B_oth */ 5182 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5183 b_othi[0] = 0; 5184 len = 0; /* total length of j or a array to be received */ 5185 k = 0; 5186 for (i=0; i<nrecvs; i++){ 5187 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5188 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5189 for (j=0; j<nrows; j++) { 5190 b_othi[k+1] = b_othi[k] + rowlen[j]; 5191 len += rowlen[j]; k++; 5192 } 5193 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5194 } 5195 5196 /* allocate space for j and a arrrays of B_oth */ 5197 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5198 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5199 5200 /* j-array */ 5201 /*---------*/ 5202 /* post receives of j-array */ 5203 for (i=0; i<nrecvs; i++){ 5204 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5205 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5206 } 5207 5208 /* pack the outgoing message j-array */ 5209 k = 0; 5210 for (i=0; i<nsends; i++){ 5211 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5212 bufJ = bufj+sstartsj[i]; 5213 for (j=0; j<nrows; j++) { 5214 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5215 for (ll=0; ll<sbs; ll++){ 5216 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5217 for (l=0; l<ncols; l++){ 5218 *bufJ++ = cols[l]; 5219 } 5220 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5221 } 5222 } 5223 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5224 } 5225 5226 /* recvs and sends of j-array are completed */ 5227 i = nrecvs; 5228 while (i--) { 5229 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5230 } 5231 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5232 } else if (scall == MAT_REUSE_MATRIX){ 5233 sstartsj = *startsj; 5234 rstartsj = *startsj_r; 5235 bufa = *bufa_ptr; 5236 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5237 b_otha = b_oth->a; 5238 } else { 5239 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5240 } 5241 5242 /* a-array */ 5243 /*---------*/ 5244 /* post receives of a-array */ 5245 for (i=0; i<nrecvs; i++){ 5246 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5247 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5248 } 5249 5250 /* pack the outgoing message a-array */ 5251 k = 0; 5252 for (i=0; i<nsends; i++){ 5253 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5254 bufA = bufa+sstartsj[i]; 5255 for (j=0; j<nrows; j++) { 5256 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5257 for (ll=0; ll<sbs; ll++){ 5258 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5259 for (l=0; l<ncols; l++){ 5260 *bufA++ = vals[l]; 5261 } 5262 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5263 } 5264 } 5265 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5266 } 5267 /* recvs and sends of a-array are completed */ 5268 i = nrecvs; 5269 while (i--) { 5270 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5271 } 5272 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5273 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5274 5275 if (scall == MAT_INITIAL_MATRIX){ 5276 /* put together the new matrix */ 5277 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5278 5279 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5280 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5281 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5282 b_oth->free_a = PETSC_TRUE; 5283 b_oth->free_ij = PETSC_TRUE; 5284 b_oth->nonew = 0; 5285 5286 ierr = PetscFree(bufj);CHKERRQ(ierr); 5287 if (!startsj || !bufa_ptr){ 5288 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5289 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5290 } else { 5291 *startsj = sstartsj; 5292 *startsj_r = rstartsj; 5293 *bufa_ptr = bufa; 5294 } 5295 } 5296 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5297 PetscFunctionReturn(0); 5298 } 5299 5300 #undef __FUNCT__ 5301 #define __FUNCT__ "MatGetCommunicationStructs" 5302 /*@C 5303 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5304 5305 Not Collective 5306 5307 Input Parameters: 5308 . A - The matrix in mpiaij format 5309 5310 Output Parameter: 5311 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5312 . colmap - A map from global column index to local index into lvec 5313 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5314 5315 Level: developer 5316 5317 @*/ 5318 #if defined (PETSC_USE_CTABLE) 5319 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5320 #else 5321 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5322 #endif 5323 { 5324 Mat_MPIAIJ *a; 5325 5326 PetscFunctionBegin; 5327 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5328 PetscValidPointer(lvec, 2); 5329 PetscValidPointer(colmap, 3); 5330 PetscValidPointer(multScatter, 4); 5331 a = (Mat_MPIAIJ *) A->data; 5332 if (lvec) *lvec = a->lvec; 5333 if (colmap) *colmap = a->colmap; 5334 if (multScatter) *multScatter = a->Mvctx; 5335 PetscFunctionReturn(0); 5336 } 5337 5338 EXTERN_C_BEGIN 5339 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5340 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5341 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5342 EXTERN_C_END 5343 5344 #undef __FUNCT__ 5345 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5346 /* 5347 Computes (B'*A')' since computing B*A directly is untenable 5348 5349 n p p 5350 ( ) ( ) ( ) 5351 m ( A ) * n ( B ) = m ( C ) 5352 ( ) ( ) ( ) 5353 5354 */ 5355 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5356 { 5357 PetscErrorCode ierr; 5358 Mat At,Bt,Ct; 5359 5360 PetscFunctionBegin; 5361 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5362 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5363 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5364 ierr = MatDestroy(&At);CHKERRQ(ierr); 5365 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5366 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5367 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5368 PetscFunctionReturn(0); 5369 } 5370 5371 #undef __FUNCT__ 5372 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5373 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5374 { 5375 PetscErrorCode ierr; 5376 PetscInt m=A->rmap->n,n=B->cmap->n; 5377 Mat Cmat; 5378 5379 PetscFunctionBegin; 5380 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5381 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5382 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5383 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5384 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5385 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5386 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5387 *C = Cmat; 5388 PetscFunctionReturn(0); 5389 } 5390 5391 /* ----------------------------------------------------------------*/ 5392 #undef __FUNCT__ 5393 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5394 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5395 { 5396 PetscErrorCode ierr; 5397 5398 PetscFunctionBegin; 5399 if (scall == MAT_INITIAL_MATRIX){ 5400 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5401 } 5402 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5403 PetscFunctionReturn(0); 5404 } 5405 5406 EXTERN_C_BEGIN 5407 #if defined(PETSC_HAVE_MUMPS) 5408 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5409 #endif 5410 #if defined(PETSC_HAVE_PASTIX) 5411 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5412 #endif 5413 #if defined(PETSC_HAVE_SUPERLU_DIST) 5414 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5415 #endif 5416 #if defined(PETSC_HAVE_SPOOLES) 5417 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5418 #endif 5419 EXTERN_C_END 5420 5421 /*MC 5422 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5423 5424 Options Database Keys: 5425 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5426 5427 Level: beginner 5428 5429 .seealso: MatCreateMPIAIJ() 5430 M*/ 5431 5432 EXTERN_C_BEGIN 5433 #undef __FUNCT__ 5434 #define __FUNCT__ "MatCreate_MPIAIJ" 5435 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5436 { 5437 Mat_MPIAIJ *b; 5438 PetscErrorCode ierr; 5439 PetscMPIInt size; 5440 5441 PetscFunctionBegin; 5442 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5443 5444 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5445 B->data = (void*)b; 5446 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5447 B->rmap->bs = 1; 5448 B->assembled = PETSC_FALSE; 5449 5450 B->insertmode = NOT_SET_VALUES; 5451 b->size = size; 5452 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5453 5454 /* build cache for off array entries formed */ 5455 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5456 b->donotstash = PETSC_FALSE; 5457 b->colmap = 0; 5458 b->garray = 0; 5459 b->roworiented = PETSC_TRUE; 5460 5461 /* stuff used for matrix vector multiply */ 5462 b->lvec = PETSC_NULL; 5463 b->Mvctx = PETSC_NULL; 5464 5465 /* stuff for MatGetRow() */ 5466 b->rowindices = 0; 5467 b->rowvalues = 0; 5468 b->getrowactive = PETSC_FALSE; 5469 5470 #if defined(PETSC_HAVE_SPOOLES) 5471 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5472 "MatGetFactor_mpiaij_spooles", 5473 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5474 #endif 5475 #if defined(PETSC_HAVE_MUMPS) 5476 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5477 "MatGetFactor_aij_mumps", 5478 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5479 #endif 5480 #if defined(PETSC_HAVE_PASTIX) 5481 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5482 "MatGetFactor_mpiaij_pastix", 5483 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5484 #endif 5485 #if defined(PETSC_HAVE_SUPERLU_DIST) 5486 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5487 "MatGetFactor_mpiaij_superlu_dist", 5488 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5489 #endif 5490 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5491 "MatStoreValues_MPIAIJ", 5492 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5493 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5494 "MatRetrieveValues_MPIAIJ", 5495 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5497 "MatGetDiagonalBlock_MPIAIJ", 5498 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5499 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5500 "MatIsTranspose_MPIAIJ", 5501 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5502 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5503 "MatMPIAIJSetPreallocation_MPIAIJ", 5504 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5505 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5506 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5507 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5508 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5509 "MatDiagonalScaleLocal_MPIAIJ", 5510 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5511 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5512 "MatConvert_MPIAIJ_MPIAIJPERM", 5513 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5514 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5515 "MatConvert_MPIAIJ_MPIAIJCRL", 5516 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5517 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5518 "MatConvert_MPIAIJ_MPISBAIJ", 5519 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5520 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5521 "MatMatMult_MPIDense_MPIAIJ", 5522 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5523 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5524 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5525 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5526 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5527 "MatMatMultNumeric_MPIDense_MPIAIJ", 5528 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5529 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5530 PetscFunctionReturn(0); 5531 } 5532 EXTERN_C_END 5533 5534 #undef __FUNCT__ 5535 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5536 /*@ 5537 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5538 and "off-diagonal" part of the matrix in CSR format. 5539 5540 Collective on MPI_Comm 5541 5542 Input Parameters: 5543 + comm - MPI communicator 5544 . m - number of local rows (Cannot be PETSC_DECIDE) 5545 . n - This value should be the same as the local size used in creating the 5546 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5547 calculated if N is given) For square matrices n is almost always m. 5548 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5549 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5550 . i - row indices for "diagonal" portion of matrix 5551 . j - column indices 5552 . a - matrix values 5553 . oi - row indices for "off-diagonal" portion of matrix 5554 . oj - column indices 5555 - oa - matrix values 5556 5557 Output Parameter: 5558 . mat - the matrix 5559 5560 Level: advanced 5561 5562 Notes: 5563 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5564 must free the arrays once the matrix has been destroyed and not before. 5565 5566 The i and j indices are 0 based 5567 5568 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5569 5570 This sets local rows and cannot be used to set off-processor values. 5571 5572 You cannot later use MatSetValues() to change values in this matrix. 5573 5574 .keywords: matrix, aij, compressed row, sparse, parallel 5575 5576 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5577 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5578 @*/ 5579 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5580 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5581 { 5582 PetscErrorCode ierr; 5583 Mat_MPIAIJ *maij; 5584 5585 PetscFunctionBegin; 5586 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5587 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5588 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5589 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5590 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5591 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5592 maij = (Mat_MPIAIJ*) (*mat)->data; 5593 maij->donotstash = PETSC_TRUE; 5594 (*mat)->preallocated = PETSC_TRUE; 5595 5596 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5597 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5598 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5599 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5600 5601 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5602 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5603 5604 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5605 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5606 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5607 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5608 5609 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5610 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5611 PetscFunctionReturn(0); 5612 } 5613 5614 /* 5615 Special version for direct calls from Fortran 5616 */ 5617 #include <private/fortranimpl.h> 5618 5619 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5620 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5621 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5622 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5623 #endif 5624 5625 /* Change these macros so can be used in void function */ 5626 #undef CHKERRQ 5627 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5628 #undef SETERRQ2 5629 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5630 #undef SETERRQ 5631 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5632 5633 EXTERN_C_BEGIN 5634 #undef __FUNCT__ 5635 #define __FUNCT__ "matsetvaluesmpiaij_" 5636 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5637 { 5638 Mat mat = *mmat; 5639 PetscInt m = *mm, n = *mn; 5640 InsertMode addv = *maddv; 5641 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5642 PetscScalar value; 5643 PetscErrorCode ierr; 5644 5645 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5646 if (mat->insertmode == NOT_SET_VALUES) { 5647 mat->insertmode = addv; 5648 } 5649 #if defined(PETSC_USE_DEBUG) 5650 else if (mat->insertmode != addv) { 5651 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5652 } 5653 #endif 5654 { 5655 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5656 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5657 PetscBool roworiented = aij->roworiented; 5658 5659 /* Some Variables required in the macro */ 5660 Mat A = aij->A; 5661 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5662 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5663 MatScalar *aa = a->a; 5664 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5665 Mat B = aij->B; 5666 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5667 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5668 MatScalar *ba = b->a; 5669 5670 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5671 PetscInt nonew = a->nonew; 5672 MatScalar *ap1,*ap2; 5673 5674 PetscFunctionBegin; 5675 for (i=0; i<m; i++) { 5676 if (im[i] < 0) continue; 5677 #if defined(PETSC_USE_DEBUG) 5678 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5679 #endif 5680 if (im[i] >= rstart && im[i] < rend) { 5681 row = im[i] - rstart; 5682 lastcol1 = -1; 5683 rp1 = aj + ai[row]; 5684 ap1 = aa + ai[row]; 5685 rmax1 = aimax[row]; 5686 nrow1 = ailen[row]; 5687 low1 = 0; 5688 high1 = nrow1; 5689 lastcol2 = -1; 5690 rp2 = bj + bi[row]; 5691 ap2 = ba + bi[row]; 5692 rmax2 = bimax[row]; 5693 nrow2 = bilen[row]; 5694 low2 = 0; 5695 high2 = nrow2; 5696 5697 for (j=0; j<n; j++) { 5698 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5699 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5700 if (in[j] >= cstart && in[j] < cend){ 5701 col = in[j] - cstart; 5702 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5703 } else if (in[j] < 0) continue; 5704 #if defined(PETSC_USE_DEBUG) 5705 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5706 #endif 5707 else { 5708 if (mat->was_assembled) { 5709 if (!aij->colmap) { 5710 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5711 } 5712 #if defined (PETSC_USE_CTABLE) 5713 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5714 col--; 5715 #else 5716 col = aij->colmap[in[j]] - 1; 5717 #endif 5718 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5719 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5720 col = in[j]; 5721 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5722 B = aij->B; 5723 b = (Mat_SeqAIJ*)B->data; 5724 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5725 rp2 = bj + bi[row]; 5726 ap2 = ba + bi[row]; 5727 rmax2 = bimax[row]; 5728 nrow2 = bilen[row]; 5729 low2 = 0; 5730 high2 = nrow2; 5731 bm = aij->B->rmap->n; 5732 ba = b->a; 5733 } 5734 } else col = in[j]; 5735 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5736 } 5737 } 5738 } else { 5739 if (!aij->donotstash) { 5740 if (roworiented) { 5741 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5742 } else { 5743 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5744 } 5745 } 5746 } 5747 }} 5748 PetscFunctionReturnVoid(); 5749 } 5750 EXTERN_C_END 5751 5752