1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1006 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 1009 /* remove zeroed rows of off diagonal matrix */ 1010 ii = aij->i; 1011 for (i=0; i<slen; i++) { 1012 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1013 } 1014 1015 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1016 if (aij->compressedrow.use){ 1017 m = aij->compressedrow.nrows; 1018 ii = aij->compressedrow.i; 1019 ridx = aij->compressedrow.rindex; 1020 for (i=0; i<m; i++){ 1021 n = ii[i+1] - ii[i]; 1022 aj = aij->j + ii[i]; 1023 aa = aij->a + ii[i]; 1024 1025 for (j=0; j<n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 bb[*ridx] -= *aa*xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 ridx++; 1034 } 1035 } else { /* do not use compressed row format */ 1036 m = l->B->rmap->n; 1037 for (i=0; i<m; i++) { 1038 n = ii[i+1] - ii[i]; 1039 aj = aij->j + ii[i]; 1040 aa = aij->a + ii[i]; 1041 for (j=0; j<n; j++) { 1042 if (PetscAbsScalar(mask[*aj])) { 1043 bb[i] -= *aa*xx[*aj]; 1044 *aa = 0.0; 1045 } 1046 aa++; 1047 aj++; 1048 } 1049 } 1050 } 1051 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1052 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1053 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1054 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1055 ierr = PetscFree(lrows);CHKERRQ(ierr); 1056 1057 /* wait on sends */ 1058 if (nsends) { 1059 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1060 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1061 ierr = PetscFree(send_status);CHKERRQ(ierr); 1062 } 1063 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1064 ierr = PetscFree(svalues);CHKERRQ(ierr); 1065 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMult_MPIAIJ" 1071 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 PetscInt nt; 1076 1077 PetscFunctionBegin; 1078 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1079 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1080 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1082 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1083 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1089 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1101 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1109 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1116 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscBool merged; 1121 1122 PetscFunctionBegin; 1123 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1124 /* do nondiagonal part */ 1125 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1126 if (!merged) { 1127 /* send it on its way */ 1128 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1129 /* do local part */ 1130 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1131 /* receive remote parts: note this assumes the values are not actually */ 1132 /* added in yy until the next line, */ 1133 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1134 } else { 1135 /* do local part */ 1136 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1137 /* send it on its way */ 1138 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1139 /* values actually were received in the Begin() but we need to call this nop */ 1140 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 } 1142 PetscFunctionReturn(0); 1143 } 1144 1145 EXTERN_C_BEGIN 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1148 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1149 { 1150 MPI_Comm comm; 1151 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1152 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1153 IS Me,Notme; 1154 PetscErrorCode ierr; 1155 PetscInt M,N,first,last,*notme,i; 1156 PetscMPIInt size; 1157 1158 PetscFunctionBegin; 1159 1160 /* Easy test: symmetric diagonal block */ 1161 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1162 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1163 if (!*f) PetscFunctionReturn(0); 1164 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1165 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1166 if (size == 1) PetscFunctionReturn(0); 1167 1168 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1169 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1170 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1171 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1172 for (i=0; i<first; i++) notme[i] = i; 1173 for (i=last; i<M; i++) notme[i-last+first] = i; 1174 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1175 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1176 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1177 Aoff = Aoffs[0]; 1178 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1179 Boff = Boffs[0]; 1180 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1181 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1182 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1183 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1184 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1185 ierr = PetscFree(notme);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 EXTERN_C_END 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1192 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1193 { 1194 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1195 PetscErrorCode ierr; 1196 1197 PetscFunctionBegin; 1198 /* do nondiagonal part */ 1199 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1200 /* send it on its way */ 1201 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1202 /* do local part */ 1203 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1204 /* receive remote parts */ 1205 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 /* 1210 This only works correctly for square matrices where the subblock A->A is the 1211 diagonal block 1212 */ 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1215 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1216 { 1217 PetscErrorCode ierr; 1218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1219 1220 PetscFunctionBegin; 1221 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1222 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1223 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatScale_MPIAIJ" 1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1230 { 1231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1236 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatDestroy_MPIAIJ" 1242 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1243 { 1244 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 #if defined(PETSC_USE_LOG) 1249 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1250 #endif 1251 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1252 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1253 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1254 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1255 #if defined (PETSC_USE_CTABLE) 1256 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1257 #else 1258 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1259 #endif 1260 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1262 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1263 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1264 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1265 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1266 1267 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1281 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1282 { 1283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1284 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1285 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1286 PetscErrorCode ierr; 1287 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1288 int fd; 1289 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1290 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1291 PetscScalar *column_values; 1292 PetscInt message_count,flowcontrolcount; 1293 1294 PetscFunctionBegin; 1295 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1296 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1297 nz = A->nz + B->nz; 1298 if (!rank) { 1299 header[0] = MAT_FILE_CLASSID; 1300 header[1] = mat->rmap->N; 1301 header[2] = mat->cmap->N; 1302 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1303 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1304 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1305 /* get largest number of rows any processor has */ 1306 rlen = mat->rmap->n; 1307 range = mat->rmap->range; 1308 for (i=1; i<size; i++) { 1309 rlen = PetscMax(rlen,range[i+1] - range[i]); 1310 } 1311 } else { 1312 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1313 rlen = mat->rmap->n; 1314 } 1315 1316 /* load up the local row counts */ 1317 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1318 for (i=0; i<mat->rmap->n; i++) { 1319 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1320 } 1321 1322 /* store the row lengths to the file */ 1323 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1324 if (!rank) { 1325 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1326 for (i=1; i<size; i++) { 1327 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1328 rlen = range[i+1] - range[i]; 1329 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1330 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1331 } 1332 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1333 } else { 1334 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1335 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1336 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1337 } 1338 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1339 1340 /* load up the local column indices */ 1341 nzmax = nz; /* )th processor needs space a largest processor needs */ 1342 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1343 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1344 cnt = 0; 1345 for (i=0; i<mat->rmap->n; i++) { 1346 for (j=B->i[i]; j<B->i[i+1]; j++) { 1347 if ( (col = garray[B->j[j]]) > cstart) break; 1348 column_indices[cnt++] = col; 1349 } 1350 for (k=A->i[i]; k<A->i[i+1]; k++) { 1351 column_indices[cnt++] = A->j[k] + cstart; 1352 } 1353 for (; j<B->i[i+1]; j++) { 1354 column_indices[cnt++] = garray[B->j[j]]; 1355 } 1356 } 1357 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1358 1359 /* store the column indices to the file */ 1360 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1361 if (!rank) { 1362 MPI_Status status; 1363 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1364 for (i=1; i<size; i++) { 1365 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1366 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1367 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1368 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1369 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1370 } 1371 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1372 } else { 1373 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1374 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1375 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1376 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1377 } 1378 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1379 1380 /* load up the local column values */ 1381 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1382 cnt = 0; 1383 for (i=0; i<mat->rmap->n; i++) { 1384 for (j=B->i[i]; j<B->i[i+1]; j++) { 1385 if ( garray[B->j[j]] > cstart) break; 1386 column_values[cnt++] = B->a[j]; 1387 } 1388 for (k=A->i[i]; k<A->i[i+1]; k++) { 1389 column_values[cnt++] = A->a[k]; 1390 } 1391 for (; j<B->i[i+1]; j++) { 1392 column_values[cnt++] = B->a[j]; 1393 } 1394 } 1395 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1396 1397 /* store the column values to the file */ 1398 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1399 if (!rank) { 1400 MPI_Status status; 1401 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1402 for (i=1; i<size; i++) { 1403 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1404 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1405 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1406 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1408 } 1409 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1410 } else { 1411 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1412 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1413 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1414 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1415 } 1416 ierr = PetscFree(column_values);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1422 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1423 { 1424 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1425 PetscErrorCode ierr; 1426 PetscMPIInt rank = aij->rank,size = aij->size; 1427 PetscBool isdraw,iascii,isbinary; 1428 PetscViewer sviewer; 1429 PetscViewerFormat format; 1430 1431 PetscFunctionBegin; 1432 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1433 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1434 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1435 if (iascii) { 1436 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1437 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1438 MatInfo info; 1439 PetscBool inodes; 1440 1441 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1442 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1443 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1444 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1445 if (!inodes) { 1446 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1447 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1448 } else { 1449 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1450 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1451 } 1452 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1454 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1455 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1456 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1458 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1459 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1460 PetscFunctionReturn(0); 1461 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1462 PetscInt inodecount,inodelimit,*inodes; 1463 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1464 if (inodes) { 1465 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1466 } else { 1467 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1468 } 1469 PetscFunctionReturn(0); 1470 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1471 PetscFunctionReturn(0); 1472 } 1473 } else if (isbinary) { 1474 if (size == 1) { 1475 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1476 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1477 } else { 1478 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1479 } 1480 PetscFunctionReturn(0); 1481 } else if (isdraw) { 1482 PetscDraw draw; 1483 PetscBool isnull; 1484 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1485 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1486 } 1487 1488 if (size == 1) { 1489 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1490 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1491 } else { 1492 /* assemble the entire matrix onto first processor. */ 1493 Mat A; 1494 Mat_SeqAIJ *Aloc; 1495 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1496 MatScalar *a; 1497 1498 if (mat->rmap->N > 1024) { 1499 PetscBool flg = PETSC_FALSE; 1500 1501 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1502 if (!flg) { 1503 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1504 } 1505 } 1506 1507 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1508 if (!rank) { 1509 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1510 } else { 1511 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1512 } 1513 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1514 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1515 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1516 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1517 1518 /* copy over the A part */ 1519 Aloc = (Mat_SeqAIJ*)aij->A->data; 1520 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1521 row = mat->rmap->rstart; 1522 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1523 for (i=0; i<m; i++) { 1524 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1525 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1526 } 1527 aj = Aloc->j; 1528 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1529 1530 /* copy over the B part */ 1531 Aloc = (Mat_SeqAIJ*)aij->B->data; 1532 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1533 row = mat->rmap->rstart; 1534 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1535 ct = cols; 1536 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1537 for (i=0; i<m; i++) { 1538 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1539 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1540 } 1541 ierr = PetscFree(ct);CHKERRQ(ierr); 1542 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1543 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1544 /* 1545 Everyone has to call to draw the matrix since the graphics waits are 1546 synchronized across all processors that share the PetscDraw object 1547 */ 1548 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1549 if (!rank) { 1550 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1551 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1552 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1553 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1554 } 1555 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1556 ierr = MatDestroy(&A);CHKERRQ(ierr); 1557 } 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatView_MPIAIJ" 1563 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1564 { 1565 PetscErrorCode ierr; 1566 PetscBool iascii,isdraw,issocket,isbinary; 1567 1568 PetscFunctionBegin; 1569 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1570 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1571 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1572 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1573 if (iascii || isdraw || isbinary || issocket) { 1574 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1575 } else { 1576 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1577 } 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatSOR_MPIAIJ" 1583 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1584 { 1585 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1586 PetscErrorCode ierr; 1587 Vec bb1 = 0; 1588 PetscBool hasop; 1589 1590 PetscFunctionBegin; 1591 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1592 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1593 } 1594 1595 if (flag == SOR_APPLY_UPPER) { 1596 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1601 if (flag & SOR_ZERO_INITIAL_GUESS) { 1602 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1603 its--; 1604 } 1605 1606 while (its--) { 1607 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1608 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 1610 /* update rhs: bb1 = bb - B*x */ 1611 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1612 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1613 1614 /* local sweep */ 1615 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1616 } 1617 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1618 if (flag & SOR_ZERO_INITIAL_GUESS) { 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1620 its--; 1621 } 1622 while (its--) { 1623 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 1626 /* update rhs: bb1 = bb - B*x */ 1627 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1628 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1629 1630 /* local sweep */ 1631 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1632 } 1633 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1634 if (flag & SOR_ZERO_INITIAL_GUESS) { 1635 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1636 its--; 1637 } 1638 while (its--) { 1639 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 1642 /* update rhs: bb1 = bb - B*x */ 1643 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1644 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1645 1646 /* local sweep */ 1647 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1648 } 1649 } else if (flag & SOR_EISENSTAT) { 1650 Vec xx1; 1651 1652 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1653 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1654 1655 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1656 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 if (!mat->diag) { 1658 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1659 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1660 } 1661 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1662 if (hasop) { 1663 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1664 } else { 1665 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1666 } 1667 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1668 1669 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1670 1671 /* local sweep */ 1672 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1673 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1674 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1675 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1676 1677 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatPermute_MPIAIJ" 1683 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1684 { 1685 MPI_Comm comm,pcomm; 1686 PetscInt first,local_size,nrows; 1687 const PetscInt *rows; 1688 PetscMPIInt size; 1689 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1690 PetscErrorCode ierr; 1691 1692 PetscFunctionBegin; 1693 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1694 /* make a collective version of 'rowp' */ 1695 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1696 if (pcomm==comm) { 1697 crowp = rowp; 1698 } else { 1699 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1700 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1701 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1702 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1703 } 1704 /* collect the global row permutation and invert it */ 1705 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1706 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1707 if (pcomm!=comm) { 1708 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1709 } 1710 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1711 /* get the local target indices */ 1712 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1713 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1714 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1716 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1717 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1718 /* the column permutation is so much easier; 1719 make a local version of 'colp' and invert it */ 1720 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1721 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1722 if (size==1) { 1723 lcolp = colp; 1724 } else { 1725 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1726 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1727 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1728 } 1729 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1730 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1731 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1732 if (size>1) { 1733 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1734 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1735 } 1736 /* now we just get the submatrix */ 1737 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1738 /* clean up */ 1739 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1740 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1746 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1747 { 1748 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1749 Mat A = mat->A,B = mat->B; 1750 PetscErrorCode ierr; 1751 PetscReal isend[5],irecv[5]; 1752 1753 PetscFunctionBegin; 1754 info->block_size = 1.0; 1755 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1759 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1760 isend[3] += info->memory; isend[4] += info->mallocs; 1761 if (flag == MAT_LOCAL) { 1762 info->nz_used = isend[0]; 1763 info->nz_allocated = isend[1]; 1764 info->nz_unneeded = isend[2]; 1765 info->memory = isend[3]; 1766 info->mallocs = isend[4]; 1767 } else if (flag == MAT_GLOBAL_MAX) { 1768 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1769 info->nz_used = irecv[0]; 1770 info->nz_allocated = irecv[1]; 1771 info->nz_unneeded = irecv[2]; 1772 info->memory = irecv[3]; 1773 info->mallocs = irecv[4]; 1774 } else if (flag == MAT_GLOBAL_SUM) { 1775 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1776 info->nz_used = irecv[0]; 1777 info->nz_allocated = irecv[1]; 1778 info->nz_unneeded = irecv[2]; 1779 info->memory = irecv[3]; 1780 info->mallocs = irecv[4]; 1781 } 1782 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1783 info->fill_ratio_needed = 0; 1784 info->factor_mallocs = 0; 1785 1786 PetscFunctionReturn(0); 1787 } 1788 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "MatSetOption_MPIAIJ" 1791 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1792 { 1793 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1794 PetscErrorCode ierr; 1795 1796 PetscFunctionBegin; 1797 switch (op) { 1798 case MAT_NEW_NONZERO_LOCATIONS: 1799 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1800 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1801 case MAT_KEEP_NONZERO_PATTERN: 1802 case MAT_NEW_NONZERO_LOCATION_ERR: 1803 case MAT_USE_INODES: 1804 case MAT_IGNORE_ZERO_ENTRIES: 1805 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1806 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1807 break; 1808 case MAT_ROW_ORIENTED: 1809 a->roworiented = flg; 1810 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1811 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1812 break; 1813 case MAT_NEW_DIAGONALS: 1814 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1815 break; 1816 case MAT_IGNORE_OFF_PROC_ENTRIES: 1817 a->donotstash = flg; 1818 break; 1819 case MAT_SPD: 1820 A->spd_set = PETSC_TRUE; 1821 A->spd = flg; 1822 if (flg) { 1823 A->symmetric = PETSC_TRUE; 1824 A->structurally_symmetric = PETSC_TRUE; 1825 A->symmetric_set = PETSC_TRUE; 1826 A->structurally_symmetric_set = PETSC_TRUE; 1827 } 1828 break; 1829 case MAT_SYMMETRIC: 1830 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1831 break; 1832 case MAT_STRUCTURALLY_SYMMETRIC: 1833 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1834 break; 1835 case MAT_HERMITIAN: 1836 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1837 break; 1838 case MAT_SYMMETRY_ETERNAL: 1839 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1840 break; 1841 default: 1842 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1843 } 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatGetRow_MPIAIJ" 1849 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1850 { 1851 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1852 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1853 PetscErrorCode ierr; 1854 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1855 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1856 PetscInt *cmap,*idx_p; 1857 1858 PetscFunctionBegin; 1859 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1860 mat->getrowactive = PETSC_TRUE; 1861 1862 if (!mat->rowvalues && (idx || v)) { 1863 /* 1864 allocate enough space to hold information from the longest row. 1865 */ 1866 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1867 PetscInt max = 1,tmp; 1868 for (i=0; i<matin->rmap->n; i++) { 1869 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1870 if (max < tmp) { max = tmp; } 1871 } 1872 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1873 } 1874 1875 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1876 lrow = row - rstart; 1877 1878 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1879 if (!v) {pvA = 0; pvB = 0;} 1880 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1881 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1882 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1883 nztot = nzA + nzB; 1884 1885 cmap = mat->garray; 1886 if (v || idx) { 1887 if (nztot) { 1888 /* Sort by increasing column numbers, assuming A and B already sorted */ 1889 PetscInt imark = -1; 1890 if (v) { 1891 *v = v_p = mat->rowvalues; 1892 for (i=0; i<nzB; i++) { 1893 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1894 else break; 1895 } 1896 imark = i; 1897 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1898 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1899 } 1900 if (idx) { 1901 *idx = idx_p = mat->rowindices; 1902 if (imark > -1) { 1903 for (i=0; i<imark; i++) { 1904 idx_p[i] = cmap[cworkB[i]]; 1905 } 1906 } else { 1907 for (i=0; i<nzB; i++) { 1908 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1909 else break; 1910 } 1911 imark = i; 1912 } 1913 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1914 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1915 } 1916 } else { 1917 if (idx) *idx = 0; 1918 if (v) *v = 0; 1919 } 1920 } 1921 *nz = nztot; 1922 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1923 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1929 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1930 { 1931 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1932 1933 PetscFunctionBegin; 1934 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1935 aij->getrowactive = PETSC_FALSE; 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatNorm_MPIAIJ" 1941 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1942 { 1943 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1944 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1945 PetscErrorCode ierr; 1946 PetscInt i,j,cstart = mat->cmap->rstart; 1947 PetscReal sum = 0.0; 1948 MatScalar *v; 1949 1950 PetscFunctionBegin; 1951 if (aij->size == 1) { 1952 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1953 } else { 1954 if (type == NORM_FROBENIUS) { 1955 v = amat->a; 1956 for (i=0; i<amat->nz; i++) { 1957 #if defined(PETSC_USE_COMPLEX) 1958 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1959 #else 1960 sum += (*v)*(*v); v++; 1961 #endif 1962 } 1963 v = bmat->a; 1964 for (i=0; i<bmat->nz; i++) { 1965 #if defined(PETSC_USE_COMPLEX) 1966 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1967 #else 1968 sum += (*v)*(*v); v++; 1969 #endif 1970 } 1971 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1972 *norm = PetscSqrtReal(*norm); 1973 } else if (type == NORM_1) { /* max column norm */ 1974 PetscReal *tmp,*tmp2; 1975 PetscInt *jj,*garray = aij->garray; 1976 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1977 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1978 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1979 *norm = 0.0; 1980 v = amat->a; jj = amat->j; 1981 for (j=0; j<amat->nz; j++) { 1982 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1983 } 1984 v = bmat->a; jj = bmat->j; 1985 for (j=0; j<bmat->nz; j++) { 1986 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1987 } 1988 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1989 for (j=0; j<mat->cmap->N; j++) { 1990 if (tmp2[j] > *norm) *norm = tmp2[j]; 1991 } 1992 ierr = PetscFree(tmp);CHKERRQ(ierr); 1993 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1994 } else if (type == NORM_INFINITY) { /* max row norm */ 1995 PetscReal ntemp = 0.0; 1996 for (j=0; j<aij->A->rmap->n; j++) { 1997 v = amat->a + amat->i[j]; 1998 sum = 0.0; 1999 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2000 sum += PetscAbsScalar(*v); v++; 2001 } 2002 v = bmat->a + bmat->i[j]; 2003 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2004 sum += PetscAbsScalar(*v); v++; 2005 } 2006 if (sum > ntemp) ntemp = sum; 2007 } 2008 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2009 } else { 2010 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2011 } 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatTranspose_MPIAIJ" 2018 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2019 { 2020 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2021 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2022 PetscErrorCode ierr; 2023 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2024 PetscInt cstart=A->cmap->rstart,ncol; 2025 Mat B; 2026 MatScalar *array; 2027 2028 PetscFunctionBegin; 2029 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2030 2031 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2032 ai = Aloc->i; aj = Aloc->j; 2033 bi = Bloc->i; bj = Bloc->j; 2034 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2035 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2036 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2037 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2038 for (i=0; i<ai[ma]; i++){ 2039 d_nnz[aj[i]] ++; 2040 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2041 } 2042 2043 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2044 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2045 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2046 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2047 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2048 } else { 2049 B = *matout; 2050 } 2051 2052 /* copy over the A part */ 2053 array = Aloc->a; 2054 row = A->rmap->rstart; 2055 for (i=0; i<ma; i++) { 2056 ncol = ai[i+1]-ai[i]; 2057 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2058 row++; array += ncol; aj += ncol; 2059 } 2060 aj = Aloc->j; 2061 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2062 2063 /* copy over the B part */ 2064 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2065 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2066 array = Bloc->a; 2067 row = A->rmap->rstart; 2068 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2069 cols_tmp = cols; 2070 for (i=0; i<mb; i++) { 2071 ncol = bi[i+1]-bi[i]; 2072 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2073 row++; array += ncol; cols_tmp += ncol; 2074 } 2075 ierr = PetscFree(cols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2080 *matout = B; 2081 } else { 2082 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2089 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2090 { 2091 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2092 Mat a = aij->A,b = aij->B; 2093 PetscErrorCode ierr; 2094 PetscInt s1,s2,s3; 2095 2096 PetscFunctionBegin; 2097 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2098 if (rr) { 2099 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2100 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2101 /* Overlap communication with computation. */ 2102 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2103 } 2104 if (ll) { 2105 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2106 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2107 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2108 } 2109 /* scale the diagonal block */ 2110 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2111 2112 if (rr) { 2113 /* Do a scatter end and then right scale the off-diagonal block */ 2114 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2116 } 2117 2118 PetscFunctionReturn(0); 2119 } 2120 2121 #undef __FUNCT__ 2122 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2123 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2124 { 2125 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2130 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2131 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2132 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2133 PetscFunctionReturn(0); 2134 } 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2137 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2138 { 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2140 PetscErrorCode ierr; 2141 2142 PetscFunctionBegin; 2143 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 #undef __FUNCT__ 2148 #define __FUNCT__ "MatEqual_MPIAIJ" 2149 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2150 { 2151 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2152 Mat a,b,c,d; 2153 PetscBool flg; 2154 PetscErrorCode ierr; 2155 2156 PetscFunctionBegin; 2157 a = matA->A; b = matA->B; 2158 c = matB->A; d = matB->B; 2159 2160 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2161 if (flg) { 2162 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2163 } 2164 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatCopy_MPIAIJ" 2170 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2171 { 2172 PetscErrorCode ierr; 2173 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2174 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2175 2176 PetscFunctionBegin; 2177 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2178 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2179 /* because of the column compression in the off-processor part of the matrix a->B, 2180 the number of columns in a->B and b->B may be different, hence we cannot call 2181 the MatCopy() directly on the two parts. If need be, we can provide a more 2182 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2183 then copying the submatrices */ 2184 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2185 } else { 2186 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2187 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2188 } 2189 PetscFunctionReturn(0); 2190 } 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2194 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2195 { 2196 PetscErrorCode ierr; 2197 2198 PetscFunctionBegin; 2199 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2200 PetscFunctionReturn(0); 2201 } 2202 2203 #undef __FUNCT__ 2204 #define __FUNCT__ "MatAXPY_MPIAIJ" 2205 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt i; 2209 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2210 PetscBLASInt bnz,one=1; 2211 Mat_SeqAIJ *x,*y; 2212 2213 PetscFunctionBegin; 2214 if (str == SAME_NONZERO_PATTERN) { 2215 PetscScalar alpha = a; 2216 x = (Mat_SeqAIJ *)xx->A->data; 2217 y = (Mat_SeqAIJ *)yy->A->data; 2218 bnz = PetscBLASIntCast(x->nz); 2219 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2220 x = (Mat_SeqAIJ *)xx->B->data; 2221 y = (Mat_SeqAIJ *)yy->B->data; 2222 bnz = PetscBLASIntCast(x->nz); 2223 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2224 } else if (str == SUBSET_NONZERO_PATTERN) { 2225 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2226 2227 x = (Mat_SeqAIJ *)xx->B->data; 2228 y = (Mat_SeqAIJ *)yy->B->data; 2229 if (y->xtoy && y->XtoY != xx->B) { 2230 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2231 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2232 } 2233 if (!y->xtoy) { /* get xtoy */ 2234 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2235 y->XtoY = xx->B; 2236 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2237 } 2238 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2239 } else { 2240 Mat B; 2241 PetscInt *nnz_d,*nnz_o; 2242 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2243 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2244 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2245 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2246 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2247 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2248 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2249 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2250 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2251 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2252 ierr = MatHeaderReplace(Y,B); 2253 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2254 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2255 } 2256 PetscFunctionReturn(0); 2257 } 2258 2259 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatConjugate_MPIAIJ" 2263 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2264 { 2265 #if defined(PETSC_USE_COMPLEX) 2266 PetscErrorCode ierr; 2267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2268 2269 PetscFunctionBegin; 2270 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2271 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2272 #else 2273 PetscFunctionBegin; 2274 #endif 2275 PetscFunctionReturn(0); 2276 } 2277 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatRealPart_MPIAIJ" 2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2281 { 2282 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2287 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2300 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #ifdef PETSC_HAVE_PBGL 2305 2306 #include <boost/parallel/mpi/bsp_process_group.hpp> 2307 #include <boost/graph/distributed/ilu_default_graph.hpp> 2308 #include <boost/graph/distributed/ilu_0_block.hpp> 2309 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2310 #include <boost/graph/distributed/petsc/interface.hpp> 2311 #include <boost/multi_array.hpp> 2312 #include <boost/parallel/distributed_property_map->hpp> 2313 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2316 /* 2317 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2318 */ 2319 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2320 { 2321 namespace petsc = boost::distributed::petsc; 2322 2323 namespace graph_dist = boost::graph::distributed; 2324 using boost::graph::distributed::ilu_default::process_group_type; 2325 using boost::graph::ilu_permuted; 2326 2327 PetscBool row_identity, col_identity; 2328 PetscContainer c; 2329 PetscInt m, n, M, N; 2330 PetscErrorCode ierr; 2331 2332 PetscFunctionBegin; 2333 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2334 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2335 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2336 if (!row_identity || !col_identity) { 2337 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2338 } 2339 2340 process_group_type pg; 2341 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2342 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2343 lgraph_type& level_graph = *lgraph_p; 2344 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2345 2346 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2347 ilu_permuted(level_graph); 2348 2349 /* put together the new matrix */ 2350 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2351 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2352 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2353 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2354 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2355 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2356 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2357 2358 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2359 ierr = PetscContainerSetPointer(c, lgraph_p); 2360 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2361 ierr = PetscContainerDestroy(&c); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2367 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2368 { 2369 PetscFunctionBegin; 2370 PetscFunctionReturn(0); 2371 } 2372 2373 #undef __FUNCT__ 2374 #define __FUNCT__ "MatSolve_MPIAIJ" 2375 /* 2376 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2377 */ 2378 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2379 { 2380 namespace graph_dist = boost::graph::distributed; 2381 2382 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2383 lgraph_type* lgraph_p; 2384 PetscContainer c; 2385 PetscErrorCode ierr; 2386 2387 PetscFunctionBegin; 2388 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2389 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2390 ierr = VecCopy(b, x);CHKERRQ(ierr); 2391 2392 PetscScalar* array_x; 2393 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2394 PetscInt sx; 2395 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2396 2397 PetscScalar* array_b; 2398 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2399 PetscInt sb; 2400 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2401 2402 lgraph_type& level_graph = *lgraph_p; 2403 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2404 2405 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2406 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2407 ref_x(array_x, boost::extents[num_vertices(graph)]); 2408 2409 typedef boost::iterator_property_map<array_ref_type::iterator, 2410 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2411 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2412 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2413 2414 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2415 2416 PetscFunctionReturn(0); 2417 } 2418 #endif 2419 2420 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2421 PetscInt nzlocal,nsends,nrecvs; 2422 PetscMPIInt *send_rank,*recv_rank; 2423 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2424 PetscScalar *sbuf_a,**rbuf_a; 2425 PetscErrorCode (*Destroy)(Mat); 2426 } Mat_Redundant; 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2430 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2431 { 2432 PetscErrorCode ierr; 2433 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2434 PetscInt i; 2435 2436 PetscFunctionBegin; 2437 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2438 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2439 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2440 for (i=0; i<redund->nrecvs; i++){ 2441 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2442 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2443 } 2444 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2445 ierr = PetscFree(redund);CHKERRQ(ierr); 2446 PetscFunctionReturn(0); 2447 } 2448 2449 #undef __FUNCT__ 2450 #define __FUNCT__ "MatDestroy_MatRedundant" 2451 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2452 { 2453 PetscErrorCode ierr; 2454 PetscContainer container; 2455 Mat_Redundant *redund=PETSC_NULL; 2456 2457 PetscFunctionBegin; 2458 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2459 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2460 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2461 A->ops->destroy = redund->Destroy; 2462 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2463 if (A->ops->destroy) { 2464 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2465 } 2466 PetscFunctionReturn(0); 2467 } 2468 2469 #undef __FUNCT__ 2470 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2471 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2472 { 2473 PetscMPIInt rank,size; 2474 MPI_Comm comm=((PetscObject)mat)->comm; 2475 PetscErrorCode ierr; 2476 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2477 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2478 PetscInt *rowrange=mat->rmap->range; 2479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2480 Mat A=aij->A,B=aij->B,C=*matredundant; 2481 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2482 PetscScalar *sbuf_a; 2483 PetscInt nzlocal=a->nz+b->nz; 2484 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2485 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2486 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2487 MatScalar *aworkA,*aworkB; 2488 PetscScalar *vals; 2489 PetscMPIInt tag1,tag2,tag3,imdex; 2490 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2491 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2492 MPI_Status recv_status,*send_status; 2493 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2494 PetscInt **rbuf_j=PETSC_NULL; 2495 PetscScalar **rbuf_a=PETSC_NULL; 2496 Mat_Redundant *redund=PETSC_NULL; 2497 PetscContainer container; 2498 2499 PetscFunctionBegin; 2500 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2501 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2502 2503 if (reuse == MAT_REUSE_MATRIX) { 2504 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2505 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2506 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2507 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2508 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2509 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2510 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2511 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2512 2513 nsends = redund->nsends; 2514 nrecvs = redund->nrecvs; 2515 send_rank = redund->send_rank; 2516 recv_rank = redund->recv_rank; 2517 sbuf_nz = redund->sbuf_nz; 2518 rbuf_nz = redund->rbuf_nz; 2519 sbuf_j = redund->sbuf_j; 2520 sbuf_a = redund->sbuf_a; 2521 rbuf_j = redund->rbuf_j; 2522 rbuf_a = redund->rbuf_a; 2523 } 2524 2525 if (reuse == MAT_INITIAL_MATRIX){ 2526 PetscMPIInt subrank,subsize; 2527 PetscInt nleftover,np_subcomm; 2528 /* get the destination processors' id send_rank, nsends and nrecvs */ 2529 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2530 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2531 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2532 np_subcomm = size/nsubcomm; 2533 nleftover = size - nsubcomm*np_subcomm; 2534 nsends = 0; nrecvs = 0; 2535 for (i=0; i<size; i++){ /* i=rank*/ 2536 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2537 send_rank[nsends] = i; nsends++; 2538 recv_rank[nrecvs++] = i; 2539 } 2540 } 2541 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2542 i = size-nleftover-1; 2543 j = 0; 2544 while (j < nsubcomm - nleftover){ 2545 send_rank[nsends++] = i; 2546 i--; j++; 2547 } 2548 } 2549 2550 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2551 for (i=0; i<nleftover; i++){ 2552 recv_rank[nrecvs++] = size-nleftover+i; 2553 } 2554 } 2555 2556 /* allocate sbuf_j, sbuf_a */ 2557 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2558 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2559 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2560 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2561 2562 /* copy mat's local entries into the buffers */ 2563 if (reuse == MAT_INITIAL_MATRIX){ 2564 rownz_max = 0; 2565 rptr = sbuf_j; 2566 cols = sbuf_j + rend-rstart + 1; 2567 vals = sbuf_a; 2568 rptr[0] = 0; 2569 for (i=0; i<rend-rstart; i++){ 2570 row = i + rstart; 2571 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2572 ncols = nzA + nzB; 2573 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2574 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2575 /* load the column indices for this row into cols */ 2576 lwrite = 0; 2577 for (l=0; l<nzB; l++) { 2578 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2579 vals[lwrite] = aworkB[l]; 2580 cols[lwrite++] = ctmp; 2581 } 2582 } 2583 for (l=0; l<nzA; l++){ 2584 vals[lwrite] = aworkA[l]; 2585 cols[lwrite++] = cstart + cworkA[l]; 2586 } 2587 for (l=0; l<nzB; l++) { 2588 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2589 vals[lwrite] = aworkB[l]; 2590 cols[lwrite++] = ctmp; 2591 } 2592 } 2593 vals += ncols; 2594 cols += ncols; 2595 rptr[i+1] = rptr[i] + ncols; 2596 if (rownz_max < ncols) rownz_max = ncols; 2597 } 2598 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2599 } else { /* only copy matrix values into sbuf_a */ 2600 rptr = sbuf_j; 2601 vals = sbuf_a; 2602 rptr[0] = 0; 2603 for (i=0; i<rend-rstart; i++){ 2604 row = i + rstart; 2605 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2606 ncols = nzA + nzB; 2607 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2608 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2609 lwrite = 0; 2610 for (l=0; l<nzB; l++) { 2611 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2612 } 2613 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2614 for (l=0; l<nzB; l++) { 2615 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2616 } 2617 vals += ncols; 2618 rptr[i+1] = rptr[i] + ncols; 2619 } 2620 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2621 2622 /* send nzlocal to others, and recv other's nzlocal */ 2623 /*--------------------------------------------------*/ 2624 if (reuse == MAT_INITIAL_MATRIX){ 2625 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2626 s_waits2 = s_waits3 + nsends; 2627 s_waits1 = s_waits2 + nsends; 2628 r_waits1 = s_waits1 + nsends; 2629 r_waits2 = r_waits1 + nrecvs; 2630 r_waits3 = r_waits2 + nrecvs; 2631 } else { 2632 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2633 r_waits3 = s_waits3 + nsends; 2634 } 2635 2636 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2637 if (reuse == MAT_INITIAL_MATRIX){ 2638 /* get new tags to keep the communication clean */ 2639 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2641 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2642 2643 /* post receives of other's nzlocal */ 2644 for (i=0; i<nrecvs; i++){ 2645 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2646 } 2647 /* send nzlocal to others */ 2648 for (i=0; i<nsends; i++){ 2649 sbuf_nz[i] = nzlocal; 2650 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2651 } 2652 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2653 count = nrecvs; 2654 while (count) { 2655 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2656 recv_rank[imdex] = recv_status.MPI_SOURCE; 2657 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2658 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2659 2660 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2661 rbuf_nz[imdex] += i + 2; 2662 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2663 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2664 count--; 2665 } 2666 /* wait on sends of nzlocal */ 2667 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2668 /* send mat->i,j to others, and recv from other's */ 2669 /*------------------------------------------------*/ 2670 for (i=0; i<nsends; i++){ 2671 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2672 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2673 } 2674 /* wait on receives of mat->i,j */ 2675 /*------------------------------*/ 2676 count = nrecvs; 2677 while (count) { 2678 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2679 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2680 count--; 2681 } 2682 /* wait on sends of mat->i,j */ 2683 /*---------------------------*/ 2684 if (nsends) { 2685 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2686 } 2687 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2688 2689 /* post receives, send and receive mat->a */ 2690 /*----------------------------------------*/ 2691 for (imdex=0; imdex<nrecvs; imdex++) { 2692 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2693 } 2694 for (i=0; i<nsends; i++){ 2695 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2696 } 2697 count = nrecvs; 2698 while (count) { 2699 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2700 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2701 count--; 2702 } 2703 if (nsends) { 2704 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2705 } 2706 2707 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2708 2709 /* create redundant matrix */ 2710 /*-------------------------*/ 2711 if (reuse == MAT_INITIAL_MATRIX){ 2712 /* compute rownz_max for preallocation */ 2713 for (imdex=0; imdex<nrecvs; imdex++){ 2714 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2715 rptr = rbuf_j[imdex]; 2716 for (i=0; i<j; i++){ 2717 ncols = rptr[i+1] - rptr[i]; 2718 if (rownz_max < ncols) rownz_max = ncols; 2719 } 2720 } 2721 2722 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2723 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2724 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2725 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2726 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2727 } else { 2728 C = *matredundant; 2729 } 2730 2731 /* insert local matrix entries */ 2732 rptr = sbuf_j; 2733 cols = sbuf_j + rend-rstart + 1; 2734 vals = sbuf_a; 2735 for (i=0; i<rend-rstart; i++){ 2736 row = i + rstart; 2737 ncols = rptr[i+1] - rptr[i]; 2738 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2739 vals += ncols; 2740 cols += ncols; 2741 } 2742 /* insert received matrix entries */ 2743 for (imdex=0; imdex<nrecvs; imdex++){ 2744 rstart = rowrange[recv_rank[imdex]]; 2745 rend = rowrange[recv_rank[imdex]+1]; 2746 rptr = rbuf_j[imdex]; 2747 cols = rbuf_j[imdex] + rend-rstart + 1; 2748 vals = rbuf_a[imdex]; 2749 for (i=0; i<rend-rstart; i++){ 2750 row = i + rstart; 2751 ncols = rptr[i+1] - rptr[i]; 2752 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2753 vals += ncols; 2754 cols += ncols; 2755 } 2756 } 2757 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2758 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2760 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2761 if (reuse == MAT_INITIAL_MATRIX) { 2762 PetscContainer container; 2763 *matredundant = C; 2764 /* create a supporting struct and attach it to C for reuse */ 2765 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2766 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2767 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2768 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2769 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2770 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2771 2772 redund->nzlocal = nzlocal; 2773 redund->nsends = nsends; 2774 redund->nrecvs = nrecvs; 2775 redund->send_rank = send_rank; 2776 redund->recv_rank = recv_rank; 2777 redund->sbuf_nz = sbuf_nz; 2778 redund->rbuf_nz = rbuf_nz; 2779 redund->sbuf_j = sbuf_j; 2780 redund->sbuf_a = sbuf_a; 2781 redund->rbuf_j = rbuf_j; 2782 redund->rbuf_a = rbuf_a; 2783 2784 redund->Destroy = C->ops->destroy; 2785 C->ops->destroy = MatDestroy_MatRedundant; 2786 } 2787 PetscFunctionReturn(0); 2788 } 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2792 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2793 { 2794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2795 PetscErrorCode ierr; 2796 PetscInt i,*idxb = 0; 2797 PetscScalar *va,*vb; 2798 Vec vtmp; 2799 2800 PetscFunctionBegin; 2801 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2802 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2803 if (idx) { 2804 for (i=0; i<A->rmap->n; i++) { 2805 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2806 } 2807 } 2808 2809 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2810 if (idx) { 2811 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2812 } 2813 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2814 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2815 2816 for (i=0; i<A->rmap->n; i++){ 2817 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2818 va[i] = vb[i]; 2819 if (idx) idx[i] = a->garray[idxb[i]]; 2820 } 2821 } 2822 2823 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2824 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2825 ierr = PetscFree(idxb);CHKERRQ(ierr); 2826 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2827 PetscFunctionReturn(0); 2828 } 2829 2830 #undef __FUNCT__ 2831 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2832 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2833 { 2834 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2835 PetscErrorCode ierr; 2836 PetscInt i,*idxb = 0; 2837 PetscScalar *va,*vb; 2838 Vec vtmp; 2839 2840 PetscFunctionBegin; 2841 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2842 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2843 if (idx) { 2844 for (i=0; i<A->cmap->n; i++) { 2845 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2846 } 2847 } 2848 2849 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2850 if (idx) { 2851 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2852 } 2853 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2854 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2855 2856 for (i=0; i<A->rmap->n; i++){ 2857 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2858 va[i] = vb[i]; 2859 if (idx) idx[i] = a->garray[idxb[i]]; 2860 } 2861 } 2862 2863 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2864 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2865 ierr = PetscFree(idxb);CHKERRQ(ierr); 2866 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2867 PetscFunctionReturn(0); 2868 } 2869 2870 #undef __FUNCT__ 2871 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2872 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2873 { 2874 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2875 PetscInt n = A->rmap->n; 2876 PetscInt cstart = A->cmap->rstart; 2877 PetscInt *cmap = mat->garray; 2878 PetscInt *diagIdx, *offdiagIdx; 2879 Vec diagV, offdiagV; 2880 PetscScalar *a, *diagA, *offdiagA; 2881 PetscInt r; 2882 PetscErrorCode ierr; 2883 2884 PetscFunctionBegin; 2885 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2886 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2887 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2888 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2889 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2890 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2891 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2892 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2893 for(r = 0; r < n; ++r) { 2894 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2895 a[r] = diagA[r]; 2896 idx[r] = cstart + diagIdx[r]; 2897 } else { 2898 a[r] = offdiagA[r]; 2899 idx[r] = cmap[offdiagIdx[r]]; 2900 } 2901 } 2902 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2903 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2904 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2905 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2906 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2907 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2908 PetscFunctionReturn(0); 2909 } 2910 2911 #undef __FUNCT__ 2912 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2913 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2914 { 2915 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2916 PetscInt n = A->rmap->n; 2917 PetscInt cstart = A->cmap->rstart; 2918 PetscInt *cmap = mat->garray; 2919 PetscInt *diagIdx, *offdiagIdx; 2920 Vec diagV, offdiagV; 2921 PetscScalar *a, *diagA, *offdiagA; 2922 PetscInt r; 2923 PetscErrorCode ierr; 2924 2925 PetscFunctionBegin; 2926 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2927 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2928 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2929 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2930 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2931 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2932 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2933 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2934 for(r = 0; r < n; ++r) { 2935 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2936 a[r] = diagA[r]; 2937 idx[r] = cstart + diagIdx[r]; 2938 } else { 2939 a[r] = offdiagA[r]; 2940 idx[r] = cmap[offdiagIdx[r]]; 2941 } 2942 } 2943 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2944 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2945 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2946 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2947 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2948 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2949 PetscFunctionReturn(0); 2950 } 2951 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2954 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2955 { 2956 PetscErrorCode ierr; 2957 Mat *dummy; 2958 2959 PetscFunctionBegin; 2960 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2961 *newmat = *dummy; 2962 ierr = PetscFree(dummy);CHKERRQ(ierr); 2963 PetscFunctionReturn(0); 2964 } 2965 2966 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2967 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2970 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2971 { 2972 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2973 PetscErrorCode ierr; 2974 2975 PetscFunctionBegin; 2976 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2977 PetscFunctionReturn(0); 2978 } 2979 2980 2981 /* -------------------------------------------------------------------*/ 2982 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2983 MatGetRow_MPIAIJ, 2984 MatRestoreRow_MPIAIJ, 2985 MatMult_MPIAIJ, 2986 /* 4*/ MatMultAdd_MPIAIJ, 2987 MatMultTranspose_MPIAIJ, 2988 MatMultTransposeAdd_MPIAIJ, 2989 #ifdef PETSC_HAVE_PBGL 2990 MatSolve_MPIAIJ, 2991 #else 2992 0, 2993 #endif 2994 0, 2995 0, 2996 /*10*/ 0, 2997 0, 2998 0, 2999 MatSOR_MPIAIJ, 3000 MatTranspose_MPIAIJ, 3001 /*15*/ MatGetInfo_MPIAIJ, 3002 MatEqual_MPIAIJ, 3003 MatGetDiagonal_MPIAIJ, 3004 MatDiagonalScale_MPIAIJ, 3005 MatNorm_MPIAIJ, 3006 /*20*/ MatAssemblyBegin_MPIAIJ, 3007 MatAssemblyEnd_MPIAIJ, 3008 MatSetOption_MPIAIJ, 3009 MatZeroEntries_MPIAIJ, 3010 /*24*/ MatZeroRows_MPIAIJ, 3011 0, 3012 #ifdef PETSC_HAVE_PBGL 3013 0, 3014 #else 3015 0, 3016 #endif 3017 0, 3018 0, 3019 /*29*/ MatSetUpPreallocation_MPIAIJ, 3020 #ifdef PETSC_HAVE_PBGL 3021 0, 3022 #else 3023 0, 3024 #endif 3025 0, 3026 0, 3027 0, 3028 /*34*/ MatDuplicate_MPIAIJ, 3029 0, 3030 0, 3031 0, 3032 0, 3033 /*39*/ MatAXPY_MPIAIJ, 3034 MatGetSubMatrices_MPIAIJ, 3035 MatIncreaseOverlap_MPIAIJ, 3036 MatGetValues_MPIAIJ, 3037 MatCopy_MPIAIJ, 3038 /*44*/ MatGetRowMax_MPIAIJ, 3039 MatScale_MPIAIJ, 3040 0, 3041 0, 3042 MatZeroRowsColumns_MPIAIJ, 3043 /*49*/ MatSetBlockSize_MPIAIJ, 3044 0, 3045 0, 3046 0, 3047 0, 3048 /*54*/ MatFDColoringCreate_MPIAIJ, 3049 0, 3050 MatSetUnfactored_MPIAIJ, 3051 MatPermute_MPIAIJ, 3052 0, 3053 /*59*/ MatGetSubMatrix_MPIAIJ, 3054 MatDestroy_MPIAIJ, 3055 MatView_MPIAIJ, 3056 0, 3057 0, 3058 /*64*/ 0, 3059 0, 3060 0, 3061 0, 3062 0, 3063 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3064 MatGetRowMinAbs_MPIAIJ, 3065 0, 3066 MatSetColoring_MPIAIJ, 3067 #if defined(PETSC_HAVE_ADIC) 3068 MatSetValuesAdic_MPIAIJ, 3069 #else 3070 0, 3071 #endif 3072 MatSetValuesAdifor_MPIAIJ, 3073 /*75*/ MatFDColoringApply_AIJ, 3074 0, 3075 0, 3076 0, 3077 0, 3078 /*80*/ 0, 3079 0, 3080 0, 3081 /*83*/ MatLoad_MPIAIJ, 3082 0, 3083 0, 3084 0, 3085 0, 3086 0, 3087 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3088 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3089 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3090 MatPtAP_Basic, 3091 MatPtAPSymbolic_MPIAIJ, 3092 /*94*/ MatPtAPNumeric_MPIAIJ, 3093 0, 3094 0, 3095 0, 3096 0, 3097 /*99*/ 0, 3098 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3099 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3100 MatConjugate_MPIAIJ, 3101 0, 3102 /*104*/MatSetValuesRow_MPIAIJ, 3103 MatRealPart_MPIAIJ, 3104 MatImaginaryPart_MPIAIJ, 3105 0, 3106 0, 3107 /*109*/0, 3108 MatGetRedundantMatrix_MPIAIJ, 3109 MatGetRowMin_MPIAIJ, 3110 0, 3111 0, 3112 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3113 0, 3114 0, 3115 0, 3116 0, 3117 /*119*/0, 3118 0, 3119 0, 3120 0, 3121 MatGetMultiProcBlock_MPIAIJ, 3122 /*124*/MatFindNonZeroRows_MPIAIJ, 3123 MatGetColumnNorms_MPIAIJ, 3124 MatInvertBlockDiagonal_MPIAIJ, 3125 0, 3126 MatGetSubMatricesParallel_MPIAIJ, 3127 /*129*/0 3128 }; 3129 3130 /* ----------------------------------------------------------------------------------------*/ 3131 3132 EXTERN_C_BEGIN 3133 #undef __FUNCT__ 3134 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3135 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3136 { 3137 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3138 PetscErrorCode ierr; 3139 3140 PetscFunctionBegin; 3141 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3142 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3143 PetscFunctionReturn(0); 3144 } 3145 EXTERN_C_END 3146 3147 EXTERN_C_BEGIN 3148 #undef __FUNCT__ 3149 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3150 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3151 { 3152 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3153 PetscErrorCode ierr; 3154 3155 PetscFunctionBegin; 3156 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3157 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3158 PetscFunctionReturn(0); 3159 } 3160 EXTERN_C_END 3161 3162 EXTERN_C_BEGIN 3163 #undef __FUNCT__ 3164 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3165 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3166 { 3167 Mat_MPIAIJ *b; 3168 PetscErrorCode ierr; 3169 PetscInt i; 3170 3171 PetscFunctionBegin; 3172 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3173 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3174 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3175 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3176 3177 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3178 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3179 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3180 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3181 if (d_nnz) { 3182 for (i=0; i<B->rmap->n; i++) { 3183 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3184 } 3185 } 3186 if (o_nnz) { 3187 for (i=0; i<B->rmap->n; i++) { 3188 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3189 } 3190 } 3191 b = (Mat_MPIAIJ*)B->data; 3192 3193 if (!B->preallocated) { 3194 /* Explicitly create 2 MATSEQAIJ matrices. */ 3195 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3196 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3197 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3198 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3199 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3200 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3201 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3202 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3203 } 3204 3205 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3206 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3207 B->preallocated = PETSC_TRUE; 3208 PetscFunctionReturn(0); 3209 } 3210 EXTERN_C_END 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3214 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3215 { 3216 Mat mat; 3217 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3218 PetscErrorCode ierr; 3219 3220 PetscFunctionBegin; 3221 *newmat = 0; 3222 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3223 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3224 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3225 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3226 a = (Mat_MPIAIJ*)mat->data; 3227 3228 mat->factortype = matin->factortype; 3229 mat->rmap->bs = matin->rmap->bs; 3230 mat->assembled = PETSC_TRUE; 3231 mat->insertmode = NOT_SET_VALUES; 3232 mat->preallocated = PETSC_TRUE; 3233 3234 a->size = oldmat->size; 3235 a->rank = oldmat->rank; 3236 a->donotstash = oldmat->donotstash; 3237 a->roworiented = oldmat->roworiented; 3238 a->rowindices = 0; 3239 a->rowvalues = 0; 3240 a->getrowactive = PETSC_FALSE; 3241 3242 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3243 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3244 3245 if (oldmat->colmap) { 3246 #if defined (PETSC_USE_CTABLE) 3247 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3248 #else 3249 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3250 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3251 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3252 #endif 3253 } else a->colmap = 0; 3254 if (oldmat->garray) { 3255 PetscInt len; 3256 len = oldmat->B->cmap->n; 3257 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3258 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3259 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3260 } else a->garray = 0; 3261 3262 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3263 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3264 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3265 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3266 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3267 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3268 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3269 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3270 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3271 *newmat = mat; 3272 PetscFunctionReturn(0); 3273 } 3274 3275 3276 3277 #undef __FUNCT__ 3278 #define __FUNCT__ "MatLoad_MPIAIJ" 3279 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3280 { 3281 PetscScalar *vals,*svals; 3282 MPI_Comm comm = ((PetscObject)viewer)->comm; 3283 PetscErrorCode ierr; 3284 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3285 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3286 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3287 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3288 PetscInt cend,cstart,n,*rowners,sizesset=1; 3289 int fd; 3290 3291 PetscFunctionBegin; 3292 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3293 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3294 if (!rank) { 3295 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3296 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3297 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3298 } 3299 3300 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3301 3302 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3303 M = header[1]; N = header[2]; 3304 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3305 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3306 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3307 3308 /* If global sizes are set, check if they are consistent with that given in the file */ 3309 if (sizesset) { 3310 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3311 } 3312 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3313 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3314 3315 /* determine ownership of all rows */ 3316 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3317 else m = newMat->rmap->n; /* Set by user */ 3318 3319 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3320 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3321 3322 /* First process needs enough room for process with most rows */ 3323 if (!rank) { 3324 mmax = rowners[1]; 3325 for (i=2; i<size; i++) { 3326 mmax = PetscMax(mmax,rowners[i]); 3327 } 3328 } else mmax = m; 3329 3330 rowners[0] = 0; 3331 for (i=2; i<=size; i++) { 3332 rowners[i] += rowners[i-1]; 3333 } 3334 rstart = rowners[rank]; 3335 rend = rowners[rank+1]; 3336 3337 /* distribute row lengths to all processors */ 3338 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3339 if (!rank) { 3340 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3341 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3342 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3343 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3344 for (j=0; j<m; j++) { 3345 procsnz[0] += ourlens[j]; 3346 } 3347 for (i=1; i<size; i++) { 3348 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3349 /* calculate the number of nonzeros on each processor */ 3350 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3351 procsnz[i] += rowlengths[j]; 3352 } 3353 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3354 } 3355 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3356 } else { 3357 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3358 } 3359 3360 if (!rank) { 3361 /* determine max buffer needed and allocate it */ 3362 maxnz = 0; 3363 for (i=0; i<size; i++) { 3364 maxnz = PetscMax(maxnz,procsnz[i]); 3365 } 3366 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3367 3368 /* read in my part of the matrix column indices */ 3369 nz = procsnz[0]; 3370 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3371 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3372 3373 /* read in every one elses and ship off */ 3374 for (i=1; i<size; i++) { 3375 nz = procsnz[i]; 3376 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3377 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3378 } 3379 ierr = PetscFree(cols);CHKERRQ(ierr); 3380 } else { 3381 /* determine buffer space needed for message */ 3382 nz = 0; 3383 for (i=0; i<m; i++) { 3384 nz += ourlens[i]; 3385 } 3386 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3387 3388 /* receive message of column indices*/ 3389 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3390 } 3391 3392 /* determine column ownership if matrix is not square */ 3393 if (N != M) { 3394 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3395 else n = newMat->cmap->n; 3396 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3397 cstart = cend - n; 3398 } else { 3399 cstart = rstart; 3400 cend = rend; 3401 n = cend - cstart; 3402 } 3403 3404 /* loop over local rows, determining number of off diagonal entries */ 3405 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3406 jj = 0; 3407 for (i=0; i<m; i++) { 3408 for (j=0; j<ourlens[i]; j++) { 3409 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3410 jj++; 3411 } 3412 } 3413 3414 for (i=0; i<m; i++) { 3415 ourlens[i] -= offlens[i]; 3416 } 3417 if (!sizesset) { 3418 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3419 } 3420 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3421 3422 for (i=0; i<m; i++) { 3423 ourlens[i] += offlens[i]; 3424 } 3425 3426 if (!rank) { 3427 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3428 3429 /* read in my part of the matrix numerical values */ 3430 nz = procsnz[0]; 3431 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3432 3433 /* insert into matrix */ 3434 jj = rstart; 3435 smycols = mycols; 3436 svals = vals; 3437 for (i=0; i<m; i++) { 3438 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3439 smycols += ourlens[i]; 3440 svals += ourlens[i]; 3441 jj++; 3442 } 3443 3444 /* read in other processors and ship out */ 3445 for (i=1; i<size; i++) { 3446 nz = procsnz[i]; 3447 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3448 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3449 } 3450 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3451 } else { 3452 /* receive numeric values */ 3453 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3454 3455 /* receive message of values*/ 3456 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3457 3458 /* insert into matrix */ 3459 jj = rstart; 3460 smycols = mycols; 3461 svals = vals; 3462 for (i=0; i<m; i++) { 3463 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3464 smycols += ourlens[i]; 3465 svals += ourlens[i]; 3466 jj++; 3467 } 3468 } 3469 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3470 ierr = PetscFree(vals);CHKERRQ(ierr); 3471 ierr = PetscFree(mycols);CHKERRQ(ierr); 3472 ierr = PetscFree(rowners);CHKERRQ(ierr); 3473 3474 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3475 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3476 PetscFunctionReturn(0); 3477 } 3478 3479 #undef __FUNCT__ 3480 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3481 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3482 { 3483 PetscErrorCode ierr; 3484 IS iscol_local; 3485 PetscInt csize; 3486 3487 PetscFunctionBegin; 3488 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3489 if (call == MAT_REUSE_MATRIX) { 3490 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3491 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3492 } else { 3493 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3494 } 3495 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3496 if (call == MAT_INITIAL_MATRIX) { 3497 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3498 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3499 } 3500 PetscFunctionReturn(0); 3501 } 3502 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3505 /* 3506 Not great since it makes two copies of the submatrix, first an SeqAIJ 3507 in local and then by concatenating the local matrices the end result. 3508 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3509 3510 Note: This requires a sequential iscol with all indices. 3511 */ 3512 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3513 { 3514 PetscErrorCode ierr; 3515 PetscMPIInt rank,size; 3516 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3517 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3518 Mat *local,M,Mreuse; 3519 MatScalar *vwork,*aa; 3520 MPI_Comm comm = ((PetscObject)mat)->comm; 3521 Mat_SeqAIJ *aij; 3522 3523 3524 PetscFunctionBegin; 3525 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3527 3528 if (call == MAT_REUSE_MATRIX) { 3529 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3530 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3531 local = &Mreuse; 3532 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3533 } else { 3534 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3535 Mreuse = *local; 3536 ierr = PetscFree(local);CHKERRQ(ierr); 3537 } 3538 3539 /* 3540 m - number of local rows 3541 n - number of columns (same on all processors) 3542 rstart - first row in new global matrix generated 3543 */ 3544 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3545 if (call == MAT_INITIAL_MATRIX) { 3546 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3547 ii = aij->i; 3548 jj = aij->j; 3549 3550 /* 3551 Determine the number of non-zeros in the diagonal and off-diagonal 3552 portions of the matrix in order to do correct preallocation 3553 */ 3554 3555 /* first get start and end of "diagonal" columns */ 3556 if (csize == PETSC_DECIDE) { 3557 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3558 if (mglobal == n) { /* square matrix */ 3559 nlocal = m; 3560 } else { 3561 nlocal = n/size + ((n % size) > rank); 3562 } 3563 } else { 3564 nlocal = csize; 3565 } 3566 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3567 rstart = rend - nlocal; 3568 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3569 3570 /* next, compute all the lengths */ 3571 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3572 olens = dlens + m; 3573 for (i=0; i<m; i++) { 3574 jend = ii[i+1] - ii[i]; 3575 olen = 0; 3576 dlen = 0; 3577 for (j=0; j<jend; j++) { 3578 if (*jj < rstart || *jj >= rend) olen++; 3579 else dlen++; 3580 jj++; 3581 } 3582 olens[i] = olen; 3583 dlens[i] = dlen; 3584 } 3585 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3586 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3587 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3588 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3589 ierr = PetscFree(dlens);CHKERRQ(ierr); 3590 } else { 3591 PetscInt ml,nl; 3592 3593 M = *newmat; 3594 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3595 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3596 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3597 /* 3598 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3599 rather than the slower MatSetValues(). 3600 */ 3601 M->was_assembled = PETSC_TRUE; 3602 M->assembled = PETSC_FALSE; 3603 } 3604 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3605 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3606 ii = aij->i; 3607 jj = aij->j; 3608 aa = aij->a; 3609 for (i=0; i<m; i++) { 3610 row = rstart + i; 3611 nz = ii[i+1] - ii[i]; 3612 cwork = jj; jj += nz; 3613 vwork = aa; aa += nz; 3614 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3615 } 3616 3617 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3619 *newmat = M; 3620 3621 /* save submatrix used in processor for next request */ 3622 if (call == MAT_INITIAL_MATRIX) { 3623 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3624 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3625 } 3626 3627 PetscFunctionReturn(0); 3628 } 3629 3630 EXTERN_C_BEGIN 3631 #undef __FUNCT__ 3632 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3633 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3634 { 3635 PetscInt m,cstart, cend,j,nnz,i,d; 3636 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3637 const PetscInt *JJ; 3638 PetscScalar *values; 3639 PetscErrorCode ierr; 3640 3641 PetscFunctionBegin; 3642 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3643 3644 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3645 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3646 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3647 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3648 m = B->rmap->n; 3649 cstart = B->cmap->rstart; 3650 cend = B->cmap->rend; 3651 rstart = B->rmap->rstart; 3652 3653 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3654 3655 #if defined(PETSC_USE_DEBUGGING) 3656 for (i=0; i<m; i++) { 3657 nnz = Ii[i+1]- Ii[i]; 3658 JJ = J + Ii[i]; 3659 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3660 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3661 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3662 } 3663 #endif 3664 3665 for (i=0; i<m; i++) { 3666 nnz = Ii[i+1]- Ii[i]; 3667 JJ = J + Ii[i]; 3668 nnz_max = PetscMax(nnz_max,nnz); 3669 d = 0; 3670 for (j=0; j<nnz; j++) { 3671 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3672 } 3673 d_nnz[i] = d; 3674 o_nnz[i] = nnz - d; 3675 } 3676 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3677 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3678 3679 if (v) values = (PetscScalar*)v; 3680 else { 3681 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3682 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3683 } 3684 3685 for (i=0; i<m; i++) { 3686 ii = i + rstart; 3687 nnz = Ii[i+1]- Ii[i]; 3688 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3689 } 3690 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3691 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 3693 if (!v) { 3694 ierr = PetscFree(values);CHKERRQ(ierr); 3695 } 3696 PetscFunctionReturn(0); 3697 } 3698 EXTERN_C_END 3699 3700 #undef __FUNCT__ 3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3702 /*@ 3703 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3704 (the default parallel PETSc format). 3705 3706 Collective on MPI_Comm 3707 3708 Input Parameters: 3709 + B - the matrix 3710 . i - the indices into j for the start of each local row (starts with zero) 3711 . j - the column indices for each local row (starts with zero) 3712 - v - optional values in the matrix 3713 3714 Level: developer 3715 3716 Notes: 3717 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3718 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3719 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3720 3721 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3722 3723 The format which is used for the sparse matrix input, is equivalent to a 3724 row-major ordering.. i.e for the following matrix, the input data expected is 3725 as shown: 3726 3727 1 0 0 3728 2 0 3 P0 3729 ------- 3730 4 5 6 P1 3731 3732 Process0 [P0]: rows_owned=[0,1] 3733 i = {0,1,3} [size = nrow+1 = 2+1] 3734 j = {0,0,2} [size = nz = 6] 3735 v = {1,2,3} [size = nz = 6] 3736 3737 Process1 [P1]: rows_owned=[2] 3738 i = {0,3} [size = nrow+1 = 1+1] 3739 j = {0,1,2} [size = nz = 6] 3740 v = {4,5,6} [size = nz = 6] 3741 3742 .keywords: matrix, aij, compressed row, sparse, parallel 3743 3744 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3745 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3746 @*/ 3747 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3748 { 3749 PetscErrorCode ierr; 3750 3751 PetscFunctionBegin; 3752 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3753 PetscFunctionReturn(0); 3754 } 3755 3756 #undef __FUNCT__ 3757 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3758 /*@C 3759 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3760 (the default parallel PETSc format). For good matrix assembly performance 3761 the user should preallocate the matrix storage by setting the parameters 3762 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3763 performance can be increased by more than a factor of 50. 3764 3765 Collective on MPI_Comm 3766 3767 Input Parameters: 3768 + A - the matrix 3769 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3770 (same value is used for all local rows) 3771 . d_nnz - array containing the number of nonzeros in the various rows of the 3772 DIAGONAL portion of the local submatrix (possibly different for each row) 3773 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3774 The size of this array is equal to the number of local rows, i.e 'm'. 3775 You must leave room for the diagonal entry even if it is zero. 3776 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3777 submatrix (same value is used for all local rows). 3778 - o_nnz - array containing the number of nonzeros in the various rows of the 3779 OFF-DIAGONAL portion of the local submatrix (possibly different for 3780 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3781 structure. The size of this array is equal to the number 3782 of local rows, i.e 'm'. 3783 3784 If the *_nnz parameter is given then the *_nz parameter is ignored 3785 3786 The AIJ format (also called the Yale sparse matrix format or 3787 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3788 storage. The stored row and column indices begin with zero. 3789 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3790 3791 The parallel matrix is partitioned such that the first m0 rows belong to 3792 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3793 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3794 3795 The DIAGONAL portion of the local submatrix of a processor can be defined 3796 as the submatrix which is obtained by extraction the part corresponding to 3797 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3798 first row that belongs to the processor, r2 is the last row belonging to 3799 the this processor, and c1-c2 is range of indices of the local part of a 3800 vector suitable for applying the matrix to. This is an mxn matrix. In the 3801 common case of a square matrix, the row and column ranges are the same and 3802 the DIAGONAL part is also square. The remaining portion of the local 3803 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3804 3805 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3806 3807 You can call MatGetInfo() to get information on how effective the preallocation was; 3808 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3809 You can also run with the option -info and look for messages with the string 3810 malloc in them to see if additional memory allocation was needed. 3811 3812 Example usage: 3813 3814 Consider the following 8x8 matrix with 34 non-zero values, that is 3815 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3816 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3817 as follows: 3818 3819 .vb 3820 1 2 0 | 0 3 0 | 0 4 3821 Proc0 0 5 6 | 7 0 0 | 8 0 3822 9 0 10 | 11 0 0 | 12 0 3823 ------------------------------------- 3824 13 0 14 | 15 16 17 | 0 0 3825 Proc1 0 18 0 | 19 20 21 | 0 0 3826 0 0 0 | 22 23 0 | 24 0 3827 ------------------------------------- 3828 Proc2 25 26 27 | 0 0 28 | 29 0 3829 30 0 0 | 31 32 33 | 0 34 3830 .ve 3831 3832 This can be represented as a collection of submatrices as: 3833 3834 .vb 3835 A B C 3836 D E F 3837 G H I 3838 .ve 3839 3840 Where the submatrices A,B,C are owned by proc0, D,E,F are 3841 owned by proc1, G,H,I are owned by proc2. 3842 3843 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3844 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3845 The 'M','N' parameters are 8,8, and have the same values on all procs. 3846 3847 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3848 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3849 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3850 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3851 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3852 matrix, ans [DF] as another SeqAIJ matrix. 3853 3854 When d_nz, o_nz parameters are specified, d_nz storage elements are 3855 allocated for every row of the local diagonal submatrix, and o_nz 3856 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3857 One way to choose d_nz and o_nz is to use the max nonzerors per local 3858 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3859 In this case, the values of d_nz,o_nz are: 3860 .vb 3861 proc0 : dnz = 2, o_nz = 2 3862 proc1 : dnz = 3, o_nz = 2 3863 proc2 : dnz = 1, o_nz = 4 3864 .ve 3865 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3866 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3867 for proc3. i.e we are using 12+15+10=37 storage locations to store 3868 34 values. 3869 3870 When d_nnz, o_nnz parameters are specified, the storage is specified 3871 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3872 In the above case the values for d_nnz,o_nnz are: 3873 .vb 3874 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3875 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3876 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3877 .ve 3878 Here the space allocated is sum of all the above values i.e 34, and 3879 hence pre-allocation is perfect. 3880 3881 Level: intermediate 3882 3883 .keywords: matrix, aij, compressed row, sparse, parallel 3884 3885 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3886 MPIAIJ, MatGetInfo() 3887 @*/ 3888 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3889 { 3890 PetscErrorCode ierr; 3891 3892 PetscFunctionBegin; 3893 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3894 PetscFunctionReturn(0); 3895 } 3896 3897 #undef __FUNCT__ 3898 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3899 /*@ 3900 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3901 CSR format the local rows. 3902 3903 Collective on MPI_Comm 3904 3905 Input Parameters: 3906 + comm - MPI communicator 3907 . m - number of local rows (Cannot be PETSC_DECIDE) 3908 . n - This value should be the same as the local size used in creating the 3909 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3910 calculated if N is given) For square matrices n is almost always m. 3911 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3912 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3913 . i - row indices 3914 . j - column indices 3915 - a - matrix values 3916 3917 Output Parameter: 3918 . mat - the matrix 3919 3920 Level: intermediate 3921 3922 Notes: 3923 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3924 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3925 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3926 3927 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3928 3929 The format which is used for the sparse matrix input, is equivalent to a 3930 row-major ordering.. i.e for the following matrix, the input data expected is 3931 as shown: 3932 3933 1 0 0 3934 2 0 3 P0 3935 ------- 3936 4 5 6 P1 3937 3938 Process0 [P0]: rows_owned=[0,1] 3939 i = {0,1,3} [size = nrow+1 = 2+1] 3940 j = {0,0,2} [size = nz = 6] 3941 v = {1,2,3} [size = nz = 6] 3942 3943 Process1 [P1]: rows_owned=[2] 3944 i = {0,3} [size = nrow+1 = 1+1] 3945 j = {0,1,2} [size = nz = 6] 3946 v = {4,5,6} [size = nz = 6] 3947 3948 .keywords: matrix, aij, compressed row, sparse, parallel 3949 3950 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3951 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3952 @*/ 3953 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3954 { 3955 PetscErrorCode ierr; 3956 3957 PetscFunctionBegin; 3958 if (i[0]) { 3959 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3960 } 3961 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3962 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3963 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3964 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3965 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3966 PetscFunctionReturn(0); 3967 } 3968 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "MatCreateMPIAIJ" 3971 /*@C 3972 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3973 (the default parallel PETSc format). For good matrix assembly performance 3974 the user should preallocate the matrix storage by setting the parameters 3975 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3976 performance can be increased by more than a factor of 50. 3977 3978 Collective on MPI_Comm 3979 3980 Input Parameters: 3981 + comm - MPI communicator 3982 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3983 This value should be the same as the local size used in creating the 3984 y vector for the matrix-vector product y = Ax. 3985 . n - This value should be the same as the local size used in creating the 3986 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3987 calculated if N is given) For square matrices n is almost always m. 3988 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3989 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3990 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3991 (same value is used for all local rows) 3992 . d_nnz - array containing the number of nonzeros in the various rows of the 3993 DIAGONAL portion of the local submatrix (possibly different for each row) 3994 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3995 The size of this array is equal to the number of local rows, i.e 'm'. 3996 You must leave room for the diagonal entry even if it is zero. 3997 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3998 submatrix (same value is used for all local rows). 3999 - o_nnz - array containing the number of nonzeros in the various rows of the 4000 OFF-DIAGONAL portion of the local submatrix (possibly different for 4001 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4002 structure. The size of this array is equal to the number 4003 of local rows, i.e 'm'. 4004 4005 Output Parameter: 4006 . A - the matrix 4007 4008 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4009 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4010 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4011 4012 Notes: 4013 If the *_nnz parameter is given then the *_nz parameter is ignored 4014 4015 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4016 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4017 storage requirements for this matrix. 4018 4019 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4020 processor than it must be used on all processors that share the object for 4021 that argument. 4022 4023 The user MUST specify either the local or global matrix dimensions 4024 (possibly both). 4025 4026 The parallel matrix is partitioned across processors such that the 4027 first m0 rows belong to process 0, the next m1 rows belong to 4028 process 1, the next m2 rows belong to process 2 etc.. where 4029 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4030 values corresponding to [m x N] submatrix. 4031 4032 The columns are logically partitioned with the n0 columns belonging 4033 to 0th partition, the next n1 columns belonging to the next 4034 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4035 4036 The DIAGONAL portion of the local submatrix on any given processor 4037 is the submatrix corresponding to the rows and columns m,n 4038 corresponding to the given processor. i.e diagonal matrix on 4039 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4040 etc. The remaining portion of the local submatrix [m x (N-n)] 4041 constitute the OFF-DIAGONAL portion. The example below better 4042 illustrates this concept. 4043 4044 For a square global matrix we define each processor's diagonal portion 4045 to be its local rows and the corresponding columns (a square submatrix); 4046 each processor's off-diagonal portion encompasses the remainder of the 4047 local matrix (a rectangular submatrix). 4048 4049 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4050 4051 When calling this routine with a single process communicator, a matrix of 4052 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4053 type of communicator, use the construction mechanism: 4054 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4055 4056 By default, this format uses inodes (identical nodes) when possible. 4057 We search for consecutive rows with the same nonzero structure, thereby 4058 reusing matrix information to achieve increased efficiency. 4059 4060 Options Database Keys: 4061 + -mat_no_inode - Do not use inodes 4062 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4063 - -mat_aij_oneindex - Internally use indexing starting at 1 4064 rather than 0. Note that when calling MatSetValues(), 4065 the user still MUST index entries starting at 0! 4066 4067 4068 Example usage: 4069 4070 Consider the following 8x8 matrix with 34 non-zero values, that is 4071 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4072 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4073 as follows: 4074 4075 .vb 4076 1 2 0 | 0 3 0 | 0 4 4077 Proc0 0 5 6 | 7 0 0 | 8 0 4078 9 0 10 | 11 0 0 | 12 0 4079 ------------------------------------- 4080 13 0 14 | 15 16 17 | 0 0 4081 Proc1 0 18 0 | 19 20 21 | 0 0 4082 0 0 0 | 22 23 0 | 24 0 4083 ------------------------------------- 4084 Proc2 25 26 27 | 0 0 28 | 29 0 4085 30 0 0 | 31 32 33 | 0 34 4086 .ve 4087 4088 This can be represented as a collection of submatrices as: 4089 4090 .vb 4091 A B C 4092 D E F 4093 G H I 4094 .ve 4095 4096 Where the submatrices A,B,C are owned by proc0, D,E,F are 4097 owned by proc1, G,H,I are owned by proc2. 4098 4099 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4100 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4101 The 'M','N' parameters are 8,8, and have the same values on all procs. 4102 4103 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4104 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4105 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4106 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4107 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4108 matrix, ans [DF] as another SeqAIJ matrix. 4109 4110 When d_nz, o_nz parameters are specified, d_nz storage elements are 4111 allocated for every row of the local diagonal submatrix, and o_nz 4112 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4113 One way to choose d_nz and o_nz is to use the max nonzerors per local 4114 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4115 In this case, the values of d_nz,o_nz are: 4116 .vb 4117 proc0 : dnz = 2, o_nz = 2 4118 proc1 : dnz = 3, o_nz = 2 4119 proc2 : dnz = 1, o_nz = 4 4120 .ve 4121 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4122 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4123 for proc3. i.e we are using 12+15+10=37 storage locations to store 4124 34 values. 4125 4126 When d_nnz, o_nnz parameters are specified, the storage is specified 4127 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4128 In the above case the values for d_nnz,o_nnz are: 4129 .vb 4130 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4131 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4132 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4133 .ve 4134 Here the space allocated is sum of all the above values i.e 34, and 4135 hence pre-allocation is perfect. 4136 4137 Level: intermediate 4138 4139 .keywords: matrix, aij, compressed row, sparse, parallel 4140 4141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4142 MPIAIJ, MatCreateMPIAIJWithArrays() 4143 @*/ 4144 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4145 { 4146 PetscErrorCode ierr; 4147 PetscMPIInt size; 4148 4149 PetscFunctionBegin; 4150 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4151 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4152 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4153 if (size > 1) { 4154 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4155 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4156 } else { 4157 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4158 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4159 } 4160 PetscFunctionReturn(0); 4161 } 4162 4163 #undef __FUNCT__ 4164 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4165 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4166 { 4167 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4168 4169 PetscFunctionBegin; 4170 *Ad = a->A; 4171 *Ao = a->B; 4172 *colmap = a->garray; 4173 PetscFunctionReturn(0); 4174 } 4175 4176 #undef __FUNCT__ 4177 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4178 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4179 { 4180 PetscErrorCode ierr; 4181 PetscInt i; 4182 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4183 4184 PetscFunctionBegin; 4185 if (coloring->ctype == IS_COLORING_GLOBAL) { 4186 ISColoringValue *allcolors,*colors; 4187 ISColoring ocoloring; 4188 4189 /* set coloring for diagonal portion */ 4190 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4191 4192 /* set coloring for off-diagonal portion */ 4193 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4194 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4195 for (i=0; i<a->B->cmap->n; i++) { 4196 colors[i] = allcolors[a->garray[i]]; 4197 } 4198 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4199 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4200 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4201 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4202 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4203 ISColoringValue *colors; 4204 PetscInt *larray; 4205 ISColoring ocoloring; 4206 4207 /* set coloring for diagonal portion */ 4208 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4209 for (i=0; i<a->A->cmap->n; i++) { 4210 larray[i] = i + A->cmap->rstart; 4211 } 4212 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4213 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4214 for (i=0; i<a->A->cmap->n; i++) { 4215 colors[i] = coloring->colors[larray[i]]; 4216 } 4217 ierr = PetscFree(larray);CHKERRQ(ierr); 4218 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4219 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4220 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4221 4222 /* set coloring for off-diagonal portion */ 4223 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4224 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4225 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4226 for (i=0; i<a->B->cmap->n; i++) { 4227 colors[i] = coloring->colors[larray[i]]; 4228 } 4229 ierr = PetscFree(larray);CHKERRQ(ierr); 4230 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4231 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4232 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4233 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4234 4235 PetscFunctionReturn(0); 4236 } 4237 4238 #if defined(PETSC_HAVE_ADIC) 4239 #undef __FUNCT__ 4240 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4241 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4242 { 4243 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4244 PetscErrorCode ierr; 4245 4246 PetscFunctionBegin; 4247 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4248 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4249 PetscFunctionReturn(0); 4250 } 4251 #endif 4252 4253 #undef __FUNCT__ 4254 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4255 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4256 { 4257 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4258 PetscErrorCode ierr; 4259 4260 PetscFunctionBegin; 4261 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4262 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4263 PetscFunctionReturn(0); 4264 } 4265 4266 #undef __FUNCT__ 4267 #define __FUNCT__ "MatMerge" 4268 /*@ 4269 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4270 matrices from each processor 4271 4272 Collective on MPI_Comm 4273 4274 Input Parameters: 4275 + comm - the communicators the parallel matrix will live on 4276 . inmat - the input sequential matrices 4277 . n - number of local columns (or PETSC_DECIDE) 4278 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4279 4280 Output Parameter: 4281 . outmat - the parallel matrix generated 4282 4283 Level: advanced 4284 4285 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4286 4287 @*/ 4288 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4289 { 4290 PetscErrorCode ierr; 4291 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4292 PetscInt *indx; 4293 PetscScalar *values; 4294 4295 PetscFunctionBegin; 4296 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4297 if (scall == MAT_INITIAL_MATRIX){ 4298 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4299 if (n == PETSC_DECIDE){ 4300 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4301 } 4302 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4303 rstart -= m; 4304 4305 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4306 for (i=0;i<m;i++) { 4307 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4308 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4309 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4310 } 4311 /* This routine will ONLY return MPIAIJ type matrix */ 4312 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4313 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4314 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4315 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4316 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4317 4318 } else if (scall == MAT_REUSE_MATRIX){ 4319 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4320 } else { 4321 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4322 } 4323 4324 for (i=0;i<m;i++) { 4325 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4326 Ii = i + rstart; 4327 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4328 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4329 } 4330 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4331 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4332 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4333 4334 PetscFunctionReturn(0); 4335 } 4336 4337 #undef __FUNCT__ 4338 #define __FUNCT__ "MatFileSplit" 4339 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4340 { 4341 PetscErrorCode ierr; 4342 PetscMPIInt rank; 4343 PetscInt m,N,i,rstart,nnz; 4344 size_t len; 4345 const PetscInt *indx; 4346 PetscViewer out; 4347 char *name; 4348 Mat B; 4349 const PetscScalar *values; 4350 4351 PetscFunctionBegin; 4352 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4353 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4354 /* Should this be the type of the diagonal block of A? */ 4355 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4356 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4357 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4358 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4359 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4360 for (i=0;i<m;i++) { 4361 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4362 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4363 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4364 } 4365 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4366 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4367 4368 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4369 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4370 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4371 sprintf(name,"%s.%d",outfile,rank); 4372 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4373 ierr = PetscFree(name); 4374 ierr = MatView(B,out);CHKERRQ(ierr); 4375 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4376 ierr = MatDestroy(&B);CHKERRQ(ierr); 4377 PetscFunctionReturn(0); 4378 } 4379 4380 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4381 #undef __FUNCT__ 4382 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4383 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4384 { 4385 PetscErrorCode ierr; 4386 Mat_Merge_SeqsToMPI *merge; 4387 PetscContainer container; 4388 4389 PetscFunctionBegin; 4390 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4391 if (container) { 4392 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4393 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4394 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4395 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4396 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4397 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4398 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4399 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4400 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4401 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4402 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4403 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4404 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4405 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4406 ierr = PetscFree(merge);CHKERRQ(ierr); 4407 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4408 } 4409 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4410 PetscFunctionReturn(0); 4411 } 4412 4413 #include <../src/mat/utils/freespace.h> 4414 #include <petscbt.h> 4415 4416 #undef __FUNCT__ 4417 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4418 /*@C 4419 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4420 matrices from each processor 4421 4422 Collective on MPI_Comm 4423 4424 Input Parameters: 4425 + comm - the communicators the parallel matrix will live on 4426 . seqmat - the input sequential matrices 4427 . m - number of local rows (or PETSC_DECIDE) 4428 . n - number of local columns (or PETSC_DECIDE) 4429 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4430 4431 Output Parameter: 4432 . mpimat - the parallel matrix generated 4433 4434 Level: advanced 4435 4436 Notes: 4437 The dimensions of the sequential matrix in each processor MUST be the same. 4438 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4439 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4440 @*/ 4441 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4442 { 4443 PetscErrorCode ierr; 4444 MPI_Comm comm=((PetscObject)mpimat)->comm; 4445 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4446 PetscMPIInt size,rank,taga,*len_s; 4447 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4448 PetscInt proc,m; 4449 PetscInt **buf_ri,**buf_rj; 4450 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4451 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4452 MPI_Request *s_waits,*r_waits; 4453 MPI_Status *status; 4454 MatScalar *aa=a->a; 4455 MatScalar **abuf_r,*ba_i; 4456 Mat_Merge_SeqsToMPI *merge; 4457 PetscContainer container; 4458 4459 PetscFunctionBegin; 4460 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4461 4462 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4463 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4464 4465 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4466 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4467 4468 bi = merge->bi; 4469 bj = merge->bj; 4470 buf_ri = merge->buf_ri; 4471 buf_rj = merge->buf_rj; 4472 4473 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4474 owners = merge->rowmap->range; 4475 len_s = merge->len_s; 4476 4477 /* send and recv matrix values */ 4478 /*-----------------------------*/ 4479 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4480 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4481 4482 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4483 for (proc=0,k=0; proc<size; proc++){ 4484 if (!len_s[proc]) continue; 4485 i = owners[proc]; 4486 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4487 k++; 4488 } 4489 4490 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4491 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4492 ierr = PetscFree(status);CHKERRQ(ierr); 4493 4494 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4495 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4496 4497 /* insert mat values of mpimat */ 4498 /*----------------------------*/ 4499 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4500 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4501 4502 for (k=0; k<merge->nrecv; k++){ 4503 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4504 nrows = *(buf_ri_k[k]); 4505 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4506 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4507 } 4508 4509 /* set values of ba */ 4510 m = merge->rowmap->n; 4511 for (i=0; i<m; i++) { 4512 arow = owners[rank] + i; 4513 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4514 bnzi = bi[i+1] - bi[i]; 4515 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4516 4517 /* add local non-zero vals of this proc's seqmat into ba */ 4518 anzi = ai[arow+1] - ai[arow]; 4519 aj = a->j + ai[arow]; 4520 aa = a->a + ai[arow]; 4521 nextaj = 0; 4522 for (j=0; nextaj<anzi; j++){ 4523 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4524 ba_i[j] += aa[nextaj++]; 4525 } 4526 } 4527 4528 /* add received vals into ba */ 4529 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4530 /* i-th row */ 4531 if (i == *nextrow[k]) { 4532 anzi = *(nextai[k]+1) - *nextai[k]; 4533 aj = buf_rj[k] + *(nextai[k]); 4534 aa = abuf_r[k] + *(nextai[k]); 4535 nextaj = 0; 4536 for (j=0; nextaj<anzi; j++){ 4537 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4538 ba_i[j] += aa[nextaj++]; 4539 } 4540 } 4541 nextrow[k]++; nextai[k]++; 4542 } 4543 } 4544 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4545 } 4546 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4547 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4548 4549 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4550 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4551 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4552 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4553 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4554 PetscFunctionReturn(0); 4555 } 4556 4557 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4558 4559 #undef __FUNCT__ 4560 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4561 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4562 { 4563 PetscErrorCode ierr; 4564 Mat B_mpi; 4565 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4566 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4567 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4568 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4569 PetscInt len,proc,*dnz,*onz; 4570 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4571 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4572 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4573 MPI_Status *status; 4574 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4575 PetscBT lnkbt; 4576 Mat_Merge_SeqsToMPI *merge; 4577 PetscContainer container; 4578 4579 PetscFunctionBegin; 4580 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4581 4582 /* make sure it is a PETSc comm */ 4583 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4584 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4585 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4586 4587 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4588 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4589 4590 /* determine row ownership */ 4591 /*---------------------------------------------------------*/ 4592 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4593 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4594 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4595 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4596 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4597 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4598 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4599 4600 m = merge->rowmap->n; 4601 M = merge->rowmap->N; 4602 owners = merge->rowmap->range; 4603 4604 /* determine the number of messages to send, their lengths */ 4605 /*---------------------------------------------------------*/ 4606 len_s = merge->len_s; 4607 4608 len = 0; /* length of buf_si[] */ 4609 merge->nsend = 0; 4610 for (proc=0; proc<size; proc++){ 4611 len_si[proc] = 0; 4612 if (proc == rank){ 4613 len_s[proc] = 0; 4614 } else { 4615 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4616 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4617 } 4618 if (len_s[proc]) { 4619 merge->nsend++; 4620 nrows = 0; 4621 for (i=owners[proc]; i<owners[proc+1]; i++){ 4622 if (ai[i+1] > ai[i]) nrows++; 4623 } 4624 len_si[proc] = 2*(nrows+1); 4625 len += len_si[proc]; 4626 } 4627 } 4628 4629 /* determine the number and length of messages to receive for ij-structure */ 4630 /*-------------------------------------------------------------------------*/ 4631 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4632 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4633 4634 /* post the Irecv of j-structure */ 4635 /*-------------------------------*/ 4636 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4637 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4638 4639 /* post the Isend of j-structure */ 4640 /*--------------------------------*/ 4641 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4642 4643 for (proc=0, k=0; proc<size; proc++){ 4644 if (!len_s[proc]) continue; 4645 i = owners[proc]; 4646 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4647 k++; 4648 } 4649 4650 /* receives and sends of j-structure are complete */ 4651 /*------------------------------------------------*/ 4652 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4653 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4654 4655 /* send and recv i-structure */ 4656 /*---------------------------*/ 4657 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4658 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4659 4660 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4661 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4662 for (proc=0,k=0; proc<size; proc++){ 4663 if (!len_s[proc]) continue; 4664 /* form outgoing message for i-structure: 4665 buf_si[0]: nrows to be sent 4666 [1:nrows]: row index (global) 4667 [nrows+1:2*nrows+1]: i-structure index 4668 */ 4669 /*-------------------------------------------*/ 4670 nrows = len_si[proc]/2 - 1; 4671 buf_si_i = buf_si + nrows+1; 4672 buf_si[0] = nrows; 4673 buf_si_i[0] = 0; 4674 nrows = 0; 4675 for (i=owners[proc]; i<owners[proc+1]; i++){ 4676 anzi = ai[i+1] - ai[i]; 4677 if (anzi) { 4678 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4679 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4680 nrows++; 4681 } 4682 } 4683 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4684 k++; 4685 buf_si += len_si[proc]; 4686 } 4687 4688 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4689 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4690 4691 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4692 for (i=0; i<merge->nrecv; i++){ 4693 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4694 } 4695 4696 ierr = PetscFree(len_si);CHKERRQ(ierr); 4697 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4698 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4699 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4700 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4701 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4702 ierr = PetscFree(status);CHKERRQ(ierr); 4703 4704 /* compute a local seq matrix in each processor */ 4705 /*----------------------------------------------*/ 4706 /* allocate bi array and free space for accumulating nonzero column info */ 4707 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4708 bi[0] = 0; 4709 4710 /* create and initialize a linked list */ 4711 nlnk = N+1; 4712 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4713 4714 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4715 len = 0; 4716 len = ai[owners[rank+1]] - ai[owners[rank]]; 4717 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4718 current_space = free_space; 4719 4720 /* determine symbolic info for each local row */ 4721 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4722 4723 for (k=0; k<merge->nrecv; k++){ 4724 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4725 nrows = *buf_ri_k[k]; 4726 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4727 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4728 } 4729 4730 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4731 len = 0; 4732 for (i=0;i<m;i++) { 4733 bnzi = 0; 4734 /* add local non-zero cols of this proc's seqmat into lnk */ 4735 arow = owners[rank] + i; 4736 anzi = ai[arow+1] - ai[arow]; 4737 aj = a->j + ai[arow]; 4738 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4739 bnzi += nlnk; 4740 /* add received col data into lnk */ 4741 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4742 if (i == *nextrow[k]) { /* i-th row */ 4743 anzi = *(nextai[k]+1) - *nextai[k]; 4744 aj = buf_rj[k] + *nextai[k]; 4745 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4746 bnzi += nlnk; 4747 nextrow[k]++; nextai[k]++; 4748 } 4749 } 4750 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4751 4752 /* if free space is not available, make more free space */ 4753 if (current_space->local_remaining<bnzi) { 4754 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4755 nspacedouble++; 4756 } 4757 /* copy data into free space, then initialize lnk */ 4758 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4759 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4760 4761 current_space->array += bnzi; 4762 current_space->local_used += bnzi; 4763 current_space->local_remaining -= bnzi; 4764 4765 bi[i+1] = bi[i] + bnzi; 4766 } 4767 4768 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4769 4770 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4771 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4772 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4773 4774 /* create symbolic parallel matrix B_mpi */ 4775 /*---------------------------------------*/ 4776 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4777 if (n==PETSC_DECIDE) { 4778 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4779 } else { 4780 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4781 } 4782 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4783 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4784 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4785 4786 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4787 B_mpi->assembled = PETSC_FALSE; 4788 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4789 merge->bi = bi; 4790 merge->bj = bj; 4791 merge->buf_ri = buf_ri; 4792 merge->buf_rj = buf_rj; 4793 merge->coi = PETSC_NULL; 4794 merge->coj = PETSC_NULL; 4795 merge->owners_co = PETSC_NULL; 4796 4797 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4798 4799 /* attach the supporting struct to B_mpi for reuse */ 4800 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4801 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4802 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4803 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4804 *mpimat = B_mpi; 4805 4806 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4807 PetscFunctionReturn(0); 4808 } 4809 4810 #undef __FUNCT__ 4811 #define __FUNCT__ "MatMerge_SeqsToMPI" 4812 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4813 { 4814 PetscErrorCode ierr; 4815 4816 PetscFunctionBegin; 4817 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4818 if (scall == MAT_INITIAL_MATRIX){ 4819 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4820 } 4821 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4822 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4823 PetscFunctionReturn(0); 4824 } 4825 4826 #undef __FUNCT__ 4827 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4828 /*@ 4829 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4830 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4831 with MatGetSize() 4832 4833 Not Collective 4834 4835 Input Parameters: 4836 + A - the matrix 4837 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4838 4839 Output Parameter: 4840 . A_loc - the local sequential matrix generated 4841 4842 Level: developer 4843 4844 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4845 4846 @*/ 4847 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4848 { 4849 PetscErrorCode ierr; 4850 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4851 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4852 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4853 MatScalar *aa=a->a,*ba=b->a,*cam; 4854 PetscScalar *ca; 4855 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4856 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4857 PetscBool match; 4858 4859 PetscFunctionBegin; 4860 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4861 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4862 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4863 if (scall == MAT_INITIAL_MATRIX){ 4864 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4865 ci[0] = 0; 4866 for (i=0; i<am; i++){ 4867 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4868 } 4869 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4870 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4871 k = 0; 4872 for (i=0; i<am; i++) { 4873 ncols_o = bi[i+1] - bi[i]; 4874 ncols_d = ai[i+1] - ai[i]; 4875 /* off-diagonal portion of A */ 4876 for (jo=0; jo<ncols_o; jo++) { 4877 col = cmap[*bj]; 4878 if (col >= cstart) break; 4879 cj[k] = col; bj++; 4880 ca[k++] = *ba++; 4881 } 4882 /* diagonal portion of A */ 4883 for (j=0; j<ncols_d; j++) { 4884 cj[k] = cstart + *aj++; 4885 ca[k++] = *aa++; 4886 } 4887 /* off-diagonal portion of A */ 4888 for (j=jo; j<ncols_o; j++) { 4889 cj[k] = cmap[*bj++]; 4890 ca[k++] = *ba++; 4891 } 4892 } 4893 /* put together the new matrix */ 4894 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4895 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4896 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4897 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4898 mat->free_a = PETSC_TRUE; 4899 mat->free_ij = PETSC_TRUE; 4900 mat->nonew = 0; 4901 } else if (scall == MAT_REUSE_MATRIX){ 4902 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4903 ci = mat->i; cj = mat->j; cam = mat->a; 4904 for (i=0; i<am; i++) { 4905 /* off-diagonal portion of A */ 4906 ncols_o = bi[i+1] - bi[i]; 4907 for (jo=0; jo<ncols_o; jo++) { 4908 col = cmap[*bj]; 4909 if (col >= cstart) break; 4910 *cam++ = *ba++; bj++; 4911 } 4912 /* diagonal portion of A */ 4913 ncols_d = ai[i+1] - ai[i]; 4914 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4915 /* off-diagonal portion of A */ 4916 for (j=jo; j<ncols_o; j++) { 4917 *cam++ = *ba++; bj++; 4918 } 4919 } 4920 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4921 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4922 PetscFunctionReturn(0); 4923 } 4924 4925 #undef __FUNCT__ 4926 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4927 /*@C 4928 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4929 4930 Not Collective 4931 4932 Input Parameters: 4933 + A - the matrix 4934 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4935 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4936 4937 Output Parameter: 4938 . A_loc - the local sequential matrix generated 4939 4940 Level: developer 4941 4942 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4943 4944 @*/ 4945 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4946 { 4947 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4948 PetscErrorCode ierr; 4949 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4950 IS isrowa,iscola; 4951 Mat *aloc; 4952 PetscBool match; 4953 4954 PetscFunctionBegin; 4955 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4956 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4957 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4958 if (!row){ 4959 start = A->rmap->rstart; end = A->rmap->rend; 4960 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4961 } else { 4962 isrowa = *row; 4963 } 4964 if (!col){ 4965 start = A->cmap->rstart; 4966 cmap = a->garray; 4967 nzA = a->A->cmap->n; 4968 nzB = a->B->cmap->n; 4969 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4970 ncols = 0; 4971 for (i=0; i<nzB; i++) { 4972 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4973 else break; 4974 } 4975 imark = i; 4976 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4977 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4978 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4979 } else { 4980 iscola = *col; 4981 } 4982 if (scall != MAT_INITIAL_MATRIX){ 4983 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4984 aloc[0] = *A_loc; 4985 } 4986 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4987 *A_loc = aloc[0]; 4988 ierr = PetscFree(aloc);CHKERRQ(ierr); 4989 if (!row){ 4990 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4991 } 4992 if (!col){ 4993 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4994 } 4995 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4996 PetscFunctionReturn(0); 4997 } 4998 4999 #undef __FUNCT__ 5000 #define __FUNCT__ "MatGetBrowsOfAcols" 5001 /*@C 5002 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5003 5004 Collective on Mat 5005 5006 Input Parameters: 5007 + A,B - the matrices in mpiaij format 5008 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5009 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5010 5011 Output Parameter: 5012 + rowb, colb - index sets of rows and columns of B to extract 5013 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 5014 - B_seq - the sequential matrix generated 5015 5016 Level: developer 5017 5018 @*/ 5019 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 5020 { 5021 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5022 PetscErrorCode ierr; 5023 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5024 IS isrowb,iscolb; 5025 Mat *bseq; 5026 5027 PetscFunctionBegin; 5028 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5029 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5030 } 5031 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5032 5033 if (scall == MAT_INITIAL_MATRIX){ 5034 start = A->cmap->rstart; 5035 cmap = a->garray; 5036 nzA = a->A->cmap->n; 5037 nzB = a->B->cmap->n; 5038 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5039 ncols = 0; 5040 for (i=0; i<nzB; i++) { /* row < local row index */ 5041 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5042 else break; 5043 } 5044 imark = i; 5045 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5046 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5047 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5048 *brstart = imark; 5049 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5050 } else { 5051 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5052 isrowb = *rowb; iscolb = *colb; 5053 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5054 bseq[0] = *B_seq; 5055 } 5056 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5057 *B_seq = bseq[0]; 5058 ierr = PetscFree(bseq);CHKERRQ(ierr); 5059 if (!rowb){ 5060 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5061 } else { 5062 *rowb = isrowb; 5063 } 5064 if (!colb){ 5065 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5066 } else { 5067 *colb = iscolb; 5068 } 5069 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5070 PetscFunctionReturn(0); 5071 } 5072 5073 #undef __FUNCT__ 5074 #define __FUNCT__ "MatGetBrowsOfAoCols" 5075 /*@C 5076 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5077 of the OFF-DIAGONAL portion of local A 5078 5079 Collective on Mat 5080 5081 Input Parameters: 5082 + A,B - the matrices in mpiaij format 5083 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5084 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5085 . startsj_r - similar to startsj for receives 5086 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5087 5088 Output Parameter: 5089 + B_oth - the sequential matrix generated 5090 5091 Level: developer 5092 5093 @*/ 5094 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5095 { 5096 VecScatter_MPI_General *gen_to,*gen_from; 5097 PetscErrorCode ierr; 5098 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5099 Mat_SeqAIJ *b_oth; 5100 VecScatter ctx=a->Mvctx; 5101 MPI_Comm comm=((PetscObject)ctx)->comm; 5102 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5103 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5104 PetscScalar *rvalues,*svalues; 5105 MatScalar *b_otha,*bufa,*bufA; 5106 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5107 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5108 MPI_Status *sstatus,rstatus; 5109 PetscMPIInt jj; 5110 PetscInt *cols,sbs,rbs; 5111 PetscScalar *vals; 5112 5113 PetscFunctionBegin; 5114 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5115 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5116 } 5117 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5118 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5119 5120 gen_to = (VecScatter_MPI_General*)ctx->todata; 5121 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5122 rvalues = gen_from->values; /* holds the length of receiving row */ 5123 svalues = gen_to->values; /* holds the length of sending row */ 5124 nrecvs = gen_from->n; 5125 nsends = gen_to->n; 5126 5127 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5128 srow = gen_to->indices; /* local row index to be sent */ 5129 sstarts = gen_to->starts; 5130 sprocs = gen_to->procs; 5131 sstatus = gen_to->sstatus; 5132 sbs = gen_to->bs; 5133 rstarts = gen_from->starts; 5134 rprocs = gen_from->procs; 5135 rbs = gen_from->bs; 5136 5137 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5138 if (scall == MAT_INITIAL_MATRIX){ 5139 /* i-array */ 5140 /*---------*/ 5141 /* post receives */ 5142 for (i=0; i<nrecvs; i++){ 5143 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5144 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5145 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5146 } 5147 5148 /* pack the outgoing message */ 5149 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5150 sstartsj[0] = 0; rstartsj[0] = 0; 5151 len = 0; /* total length of j or a array to be sent */ 5152 k = 0; 5153 for (i=0; i<nsends; i++){ 5154 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5155 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5156 for (j=0; j<nrows; j++) { 5157 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5158 for (l=0; l<sbs; l++){ 5159 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5160 rowlen[j*sbs+l] = ncols; 5161 len += ncols; 5162 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5163 } 5164 k++; 5165 } 5166 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5167 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5168 } 5169 /* recvs and sends of i-array are completed */ 5170 i = nrecvs; 5171 while (i--) { 5172 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5173 } 5174 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5175 5176 /* allocate buffers for sending j and a arrays */ 5177 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5178 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5179 5180 /* create i-array of B_oth */ 5181 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5182 b_othi[0] = 0; 5183 len = 0; /* total length of j or a array to be received */ 5184 k = 0; 5185 for (i=0; i<nrecvs; i++){ 5186 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5187 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5188 for (j=0; j<nrows; j++) { 5189 b_othi[k+1] = b_othi[k] + rowlen[j]; 5190 len += rowlen[j]; k++; 5191 } 5192 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5193 } 5194 5195 /* allocate space for j and a arrrays of B_oth */ 5196 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5197 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5198 5199 /* j-array */ 5200 /*---------*/ 5201 /* post receives of j-array */ 5202 for (i=0; i<nrecvs; i++){ 5203 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5204 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5205 } 5206 5207 /* pack the outgoing message j-array */ 5208 k = 0; 5209 for (i=0; i<nsends; i++){ 5210 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5211 bufJ = bufj+sstartsj[i]; 5212 for (j=0; j<nrows; j++) { 5213 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5214 for (ll=0; ll<sbs; ll++){ 5215 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5216 for (l=0; l<ncols; l++){ 5217 *bufJ++ = cols[l]; 5218 } 5219 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5220 } 5221 } 5222 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5223 } 5224 5225 /* recvs and sends of j-array are completed */ 5226 i = nrecvs; 5227 while (i--) { 5228 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5229 } 5230 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5231 } else if (scall == MAT_REUSE_MATRIX){ 5232 sstartsj = *startsj; 5233 rstartsj = *startsj_r; 5234 bufa = *bufa_ptr; 5235 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5236 b_otha = b_oth->a; 5237 } else { 5238 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5239 } 5240 5241 /* a-array */ 5242 /*---------*/ 5243 /* post receives of a-array */ 5244 for (i=0; i<nrecvs; i++){ 5245 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5246 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5247 } 5248 5249 /* pack the outgoing message a-array */ 5250 k = 0; 5251 for (i=0; i<nsends; i++){ 5252 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5253 bufA = bufa+sstartsj[i]; 5254 for (j=0; j<nrows; j++) { 5255 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5256 for (ll=0; ll<sbs; ll++){ 5257 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5258 for (l=0; l<ncols; l++){ 5259 *bufA++ = vals[l]; 5260 } 5261 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5262 } 5263 } 5264 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5265 } 5266 /* recvs and sends of a-array are completed */ 5267 i = nrecvs; 5268 while (i--) { 5269 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5270 } 5271 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5272 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5273 5274 if (scall == MAT_INITIAL_MATRIX){ 5275 /* put together the new matrix */ 5276 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5277 5278 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5279 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5280 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5281 b_oth->free_a = PETSC_TRUE; 5282 b_oth->free_ij = PETSC_TRUE; 5283 b_oth->nonew = 0; 5284 5285 ierr = PetscFree(bufj);CHKERRQ(ierr); 5286 if (!startsj || !bufa_ptr){ 5287 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5288 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5289 } else { 5290 *startsj = sstartsj; 5291 *startsj_r = rstartsj; 5292 *bufa_ptr = bufa; 5293 } 5294 } 5295 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5296 PetscFunctionReturn(0); 5297 } 5298 5299 #undef __FUNCT__ 5300 #define __FUNCT__ "MatGetCommunicationStructs" 5301 /*@C 5302 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5303 5304 Not Collective 5305 5306 Input Parameters: 5307 . A - The matrix in mpiaij format 5308 5309 Output Parameter: 5310 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5311 . colmap - A map from global column index to local index into lvec 5312 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5313 5314 Level: developer 5315 5316 @*/ 5317 #if defined (PETSC_USE_CTABLE) 5318 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5319 #else 5320 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5321 #endif 5322 { 5323 Mat_MPIAIJ *a; 5324 5325 PetscFunctionBegin; 5326 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5327 PetscValidPointer(lvec, 2); 5328 PetscValidPointer(colmap, 3); 5329 PetscValidPointer(multScatter, 4); 5330 a = (Mat_MPIAIJ *) A->data; 5331 if (lvec) *lvec = a->lvec; 5332 if (colmap) *colmap = a->colmap; 5333 if (multScatter) *multScatter = a->Mvctx; 5334 PetscFunctionReturn(0); 5335 } 5336 5337 EXTERN_C_BEGIN 5338 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5339 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5340 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5341 EXTERN_C_END 5342 5343 #undef __FUNCT__ 5344 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5345 /* 5346 Computes (B'*A')' since computing B*A directly is untenable 5347 5348 n p p 5349 ( ) ( ) ( ) 5350 m ( A ) * n ( B ) = m ( C ) 5351 ( ) ( ) ( ) 5352 5353 */ 5354 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5355 { 5356 PetscErrorCode ierr; 5357 Mat At,Bt,Ct; 5358 5359 PetscFunctionBegin; 5360 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5361 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5362 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5363 ierr = MatDestroy(&At);CHKERRQ(ierr); 5364 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5365 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5366 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5367 PetscFunctionReturn(0); 5368 } 5369 5370 #undef __FUNCT__ 5371 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5372 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5373 { 5374 PetscErrorCode ierr; 5375 PetscInt m=A->rmap->n,n=B->cmap->n; 5376 Mat Cmat; 5377 5378 PetscFunctionBegin; 5379 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5380 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5381 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5382 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5383 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5384 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5385 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5386 *C = Cmat; 5387 PetscFunctionReturn(0); 5388 } 5389 5390 /* ----------------------------------------------------------------*/ 5391 #undef __FUNCT__ 5392 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5393 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5394 { 5395 PetscErrorCode ierr; 5396 5397 PetscFunctionBegin; 5398 if (scall == MAT_INITIAL_MATRIX){ 5399 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5400 } 5401 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5402 PetscFunctionReturn(0); 5403 } 5404 5405 EXTERN_C_BEGIN 5406 #if defined(PETSC_HAVE_MUMPS) 5407 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5408 #endif 5409 #if defined(PETSC_HAVE_PASTIX) 5410 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5411 #endif 5412 #if defined(PETSC_HAVE_SUPERLU_DIST) 5413 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5414 #endif 5415 #if defined(PETSC_HAVE_SPOOLES) 5416 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5417 #endif 5418 EXTERN_C_END 5419 5420 /*MC 5421 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5422 5423 Options Database Keys: 5424 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5425 5426 Level: beginner 5427 5428 .seealso: MatCreateMPIAIJ() 5429 M*/ 5430 5431 EXTERN_C_BEGIN 5432 #undef __FUNCT__ 5433 #define __FUNCT__ "MatCreate_MPIAIJ" 5434 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5435 { 5436 Mat_MPIAIJ *b; 5437 PetscErrorCode ierr; 5438 PetscMPIInt size; 5439 5440 PetscFunctionBegin; 5441 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5442 5443 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5444 B->data = (void*)b; 5445 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5446 B->rmap->bs = 1; 5447 B->assembled = PETSC_FALSE; 5448 5449 B->insertmode = NOT_SET_VALUES; 5450 b->size = size; 5451 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5452 5453 /* build cache for off array entries formed */ 5454 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5455 b->donotstash = PETSC_FALSE; 5456 b->colmap = 0; 5457 b->garray = 0; 5458 b->roworiented = PETSC_TRUE; 5459 5460 /* stuff used for matrix vector multiply */ 5461 b->lvec = PETSC_NULL; 5462 b->Mvctx = PETSC_NULL; 5463 5464 /* stuff for MatGetRow() */ 5465 b->rowindices = 0; 5466 b->rowvalues = 0; 5467 b->getrowactive = PETSC_FALSE; 5468 5469 #if defined(PETSC_HAVE_SPOOLES) 5470 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5471 "MatGetFactor_mpiaij_spooles", 5472 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5473 #endif 5474 #if defined(PETSC_HAVE_MUMPS) 5475 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5476 "MatGetFactor_aij_mumps", 5477 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5478 #endif 5479 #if defined(PETSC_HAVE_PASTIX) 5480 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5481 "MatGetFactor_mpiaij_pastix", 5482 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5483 #endif 5484 #if defined(PETSC_HAVE_SUPERLU_DIST) 5485 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5486 "MatGetFactor_mpiaij_superlu_dist", 5487 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5488 #endif 5489 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5490 "MatStoreValues_MPIAIJ", 5491 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5492 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5493 "MatRetrieveValues_MPIAIJ", 5494 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5495 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5496 "MatGetDiagonalBlock_MPIAIJ", 5497 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5499 "MatIsTranspose_MPIAIJ", 5500 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5502 "MatMPIAIJSetPreallocation_MPIAIJ", 5503 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5504 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5505 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5506 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5507 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5508 "MatDiagonalScaleLocal_MPIAIJ", 5509 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5511 "MatConvert_MPIAIJ_MPIAIJPERM", 5512 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5513 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5514 "MatConvert_MPIAIJ_MPIAIJCRL", 5515 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5516 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5517 "MatConvert_MPIAIJ_MPISBAIJ", 5518 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5519 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5520 "MatMatMult_MPIDense_MPIAIJ", 5521 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5522 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5523 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5524 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5525 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5526 "MatMatMultNumeric_MPIDense_MPIAIJ", 5527 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5528 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5529 PetscFunctionReturn(0); 5530 } 5531 EXTERN_C_END 5532 5533 #undef __FUNCT__ 5534 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5535 /*@ 5536 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5537 and "off-diagonal" part of the matrix in CSR format. 5538 5539 Collective on MPI_Comm 5540 5541 Input Parameters: 5542 + comm - MPI communicator 5543 . m - number of local rows (Cannot be PETSC_DECIDE) 5544 . n - This value should be the same as the local size used in creating the 5545 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5546 calculated if N is given) For square matrices n is almost always m. 5547 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5548 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5549 . i - row indices for "diagonal" portion of matrix 5550 . j - column indices 5551 . a - matrix values 5552 . oi - row indices for "off-diagonal" portion of matrix 5553 . oj - column indices 5554 - oa - matrix values 5555 5556 Output Parameter: 5557 . mat - the matrix 5558 5559 Level: advanced 5560 5561 Notes: 5562 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5563 must free the arrays once the matrix has been destroyed and not before. 5564 5565 The i and j indices are 0 based 5566 5567 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5568 5569 This sets local rows and cannot be used to set off-processor values. 5570 5571 You cannot later use MatSetValues() to change values in this matrix. 5572 5573 .keywords: matrix, aij, compressed row, sparse, parallel 5574 5575 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5576 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5577 @*/ 5578 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5579 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5580 { 5581 PetscErrorCode ierr; 5582 Mat_MPIAIJ *maij; 5583 5584 PetscFunctionBegin; 5585 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5586 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5587 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5588 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5589 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5590 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5591 maij = (Mat_MPIAIJ*) (*mat)->data; 5592 maij->donotstash = PETSC_TRUE; 5593 (*mat)->preallocated = PETSC_TRUE; 5594 5595 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5596 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5597 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5598 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5599 5600 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5601 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5602 5603 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5604 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5605 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5606 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5607 5608 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5609 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5610 PetscFunctionReturn(0); 5611 } 5612 5613 /* 5614 Special version for direct calls from Fortran 5615 */ 5616 #include <private/fortranimpl.h> 5617 5618 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5619 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5620 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5621 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5622 #endif 5623 5624 /* Change these macros so can be used in void function */ 5625 #undef CHKERRQ 5626 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5627 #undef SETERRQ2 5628 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5629 #undef SETERRQ 5630 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5631 5632 EXTERN_C_BEGIN 5633 #undef __FUNCT__ 5634 #define __FUNCT__ "matsetvaluesmpiaij_" 5635 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5636 { 5637 Mat mat = *mmat; 5638 PetscInt m = *mm, n = *mn; 5639 InsertMode addv = *maddv; 5640 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5641 PetscScalar value; 5642 PetscErrorCode ierr; 5643 5644 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5645 if (mat->insertmode == NOT_SET_VALUES) { 5646 mat->insertmode = addv; 5647 } 5648 #if defined(PETSC_USE_DEBUG) 5649 else if (mat->insertmode != addv) { 5650 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5651 } 5652 #endif 5653 { 5654 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5655 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5656 PetscBool roworiented = aij->roworiented; 5657 5658 /* Some Variables required in the macro */ 5659 Mat A = aij->A; 5660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5661 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5662 MatScalar *aa = a->a; 5663 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5664 Mat B = aij->B; 5665 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5666 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5667 MatScalar *ba = b->a; 5668 5669 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5670 PetscInt nonew = a->nonew; 5671 MatScalar *ap1,*ap2; 5672 5673 PetscFunctionBegin; 5674 for (i=0; i<m; i++) { 5675 if (im[i] < 0) continue; 5676 #if defined(PETSC_USE_DEBUG) 5677 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5678 #endif 5679 if (im[i] >= rstart && im[i] < rend) { 5680 row = im[i] - rstart; 5681 lastcol1 = -1; 5682 rp1 = aj + ai[row]; 5683 ap1 = aa + ai[row]; 5684 rmax1 = aimax[row]; 5685 nrow1 = ailen[row]; 5686 low1 = 0; 5687 high1 = nrow1; 5688 lastcol2 = -1; 5689 rp2 = bj + bi[row]; 5690 ap2 = ba + bi[row]; 5691 rmax2 = bimax[row]; 5692 nrow2 = bilen[row]; 5693 low2 = 0; 5694 high2 = nrow2; 5695 5696 for (j=0; j<n; j++) { 5697 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5698 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5699 if (in[j] >= cstart && in[j] < cend){ 5700 col = in[j] - cstart; 5701 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5702 } else if (in[j] < 0) continue; 5703 #if defined(PETSC_USE_DEBUG) 5704 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5705 #endif 5706 else { 5707 if (mat->was_assembled) { 5708 if (!aij->colmap) { 5709 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5710 } 5711 #if defined (PETSC_USE_CTABLE) 5712 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5713 col--; 5714 #else 5715 col = aij->colmap[in[j]] - 1; 5716 #endif 5717 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5718 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5719 col = in[j]; 5720 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5721 B = aij->B; 5722 b = (Mat_SeqAIJ*)B->data; 5723 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5724 rp2 = bj + bi[row]; 5725 ap2 = ba + bi[row]; 5726 rmax2 = bimax[row]; 5727 nrow2 = bilen[row]; 5728 low2 = 0; 5729 high2 = nrow2; 5730 bm = aij->B->rmap->n; 5731 ba = b->a; 5732 } 5733 } else col = in[j]; 5734 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5735 } 5736 } 5737 } else { 5738 if (!aij->donotstash) { 5739 if (roworiented) { 5740 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5741 } else { 5742 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5743 } 5744 } 5745 } 5746 }} 5747 PetscFunctionReturnVoid(); 5748 } 5749 EXTERN_C_END 5750 5751