1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 if (x) { 1004 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 } 1009 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1010 1011 /* remove zeroed rows of off diagonal matrix */ 1012 ii = aij->i; 1013 for (i=0; i<slen; i++) { 1014 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1015 } 1016 1017 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1018 if (aij->compressedrow.use){ 1019 m = aij->compressedrow.nrows; 1020 ii = aij->compressedrow.i; 1021 ridx = aij->compressedrow.rindex; 1022 for (i=0; i<m; i++){ 1023 n = ii[i+1] - ii[i]; 1024 aj = aij->j + ii[i]; 1025 aa = aij->a + ii[i]; 1026 1027 for (j=0; j<n; j++) { 1028 if (PetscAbsScalar(mask[*aj])) { 1029 if (b) bb[*ridx] -= *aa*xx[*aj]; 1030 *aa = 0.0; 1031 } 1032 aa++; 1033 aj++; 1034 } 1035 ridx++; 1036 } 1037 } else { /* do not use compressed row format */ 1038 m = l->B->rmap->n; 1039 for (i=0; i<m; i++) { 1040 n = ii[i+1] - ii[i]; 1041 aj = aij->j + ii[i]; 1042 aa = aij->a + ii[i]; 1043 for (j=0; j<n; j++) { 1044 if (PetscAbsScalar(mask[*aj])) { 1045 if (b) bb[i] -= *aa*xx[*aj]; 1046 *aa = 0.0; 1047 } 1048 aa++; 1049 aj++; 1050 } 1051 } 1052 } 1053 if (x) { 1054 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1055 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1056 } 1057 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1058 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1059 ierr = PetscFree(lrows);CHKERRQ(ierr); 1060 1061 /* wait on sends */ 1062 if (nsends) { 1063 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1064 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1065 ierr = PetscFree(send_status);CHKERRQ(ierr); 1066 } 1067 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1068 ierr = PetscFree(svalues);CHKERRQ(ierr); 1069 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatMult_MPIAIJ" 1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 PetscInt nt; 1080 1081 PetscFunctionBegin; 1082 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1083 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1084 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1085 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1094 { 1095 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 PetscBool merged; 1125 1126 PetscFunctionBegin; 1127 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1128 /* do nondiagonal part */ 1129 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1130 if (!merged) { 1131 /* send it on its way */ 1132 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1133 /* do local part */ 1134 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1135 /* receive remote parts: note this assumes the values are not actually */ 1136 /* added in yy until the next line, */ 1137 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 } else { 1139 /* do local part */ 1140 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1141 /* send it on its way */ 1142 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 /* values actually were received in the Begin() but we need to call this nop */ 1144 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 EXTERN_C_BEGIN 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1152 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1153 { 1154 MPI_Comm comm; 1155 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1156 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1157 IS Me,Notme; 1158 PetscErrorCode ierr; 1159 PetscInt M,N,first,last,*notme,i; 1160 PetscMPIInt size; 1161 1162 PetscFunctionBegin; 1163 1164 /* Easy test: symmetric diagonal block */ 1165 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1166 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1167 if (!*f) PetscFunctionReturn(0); 1168 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1169 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1170 if (size == 1) PetscFunctionReturn(0); 1171 1172 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1173 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1174 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1175 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1176 for (i=0; i<first; i++) notme[i] = i; 1177 for (i=last; i<M; i++) notme[i-last+first] = i; 1178 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1179 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1180 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1181 Aoff = Aoffs[0]; 1182 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1183 Boff = Boffs[0]; 1184 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1185 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1186 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1187 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1188 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1189 ierr = PetscFree(notme);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 EXTERN_C_END 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1197 { 1198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 /* do nondiagonal part */ 1203 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1204 /* send it on its way */ 1205 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* receive remote parts */ 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1220 { 1221 PetscErrorCode ierr; 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1226 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1227 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatScale_MPIAIJ" 1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1234 { 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1240 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatDestroy_MPIAIJ" 1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1247 { 1248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 #if defined(PETSC_USE_LOG) 1253 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1254 #endif 1255 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1256 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1257 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1258 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1259 #if defined (PETSC_USE_CTABLE) 1260 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1261 #else 1262 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1263 #endif 1264 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1266 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1267 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1268 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1269 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1270 1271 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1286 { 1287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1288 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1289 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1290 PetscErrorCode ierr; 1291 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1292 int fd; 1293 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1294 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1295 PetscScalar *column_values; 1296 PetscInt message_count,flowcontrolcount; 1297 1298 PetscFunctionBegin; 1299 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1300 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1301 nz = A->nz + B->nz; 1302 if (!rank) { 1303 header[0] = MAT_FILE_CLASSID; 1304 header[1] = mat->rmap->N; 1305 header[2] = mat->cmap->N; 1306 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1307 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1308 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1309 /* get largest number of rows any processor has */ 1310 rlen = mat->rmap->n; 1311 range = mat->rmap->range; 1312 for (i=1; i<size; i++) { 1313 rlen = PetscMax(rlen,range[i+1] - range[i]); 1314 } 1315 } else { 1316 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1317 rlen = mat->rmap->n; 1318 } 1319 1320 /* load up the local row counts */ 1321 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1322 for (i=0; i<mat->rmap->n; i++) { 1323 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1324 } 1325 1326 /* store the row lengths to the file */ 1327 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1328 if (!rank) { 1329 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1330 for (i=1; i<size; i++) { 1331 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1332 rlen = range[i+1] - range[i]; 1333 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 } 1336 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1337 } else { 1338 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1339 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1341 } 1342 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1343 1344 /* load up the local column indices */ 1345 nzmax = nz; /* )th processor needs space a largest processor needs */ 1346 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1347 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1348 cnt = 0; 1349 for (i=0; i<mat->rmap->n; i++) { 1350 for (j=B->i[i]; j<B->i[i+1]; j++) { 1351 if ( (col = garray[B->j[j]]) > cstart) break; 1352 column_indices[cnt++] = col; 1353 } 1354 for (k=A->i[i]; k<A->i[i+1]; k++) { 1355 column_indices[cnt++] = A->j[k] + cstart; 1356 } 1357 for (; j<B->i[i+1]; j++) { 1358 column_indices[cnt++] = garray[B->j[j]]; 1359 } 1360 } 1361 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1362 1363 /* store the column indices to the file */ 1364 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1365 if (!rank) { 1366 MPI_Status status; 1367 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1368 for (i=1; i<size; i++) { 1369 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1370 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1371 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1372 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1373 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1376 } else { 1377 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1378 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1380 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1381 } 1382 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1383 1384 /* load up the local column values */ 1385 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1386 cnt = 0; 1387 for (i=0; i<mat->rmap->n; i++) { 1388 for (j=B->i[i]; j<B->i[i+1]; j++) { 1389 if ( garray[B->j[j]] > cstart) break; 1390 column_values[cnt++] = B->a[j]; 1391 } 1392 for (k=A->i[i]; k<A->i[i+1]; k++) { 1393 column_values[cnt++] = A->a[k]; 1394 } 1395 for (; j<B->i[i+1]; j++) { 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 } 1399 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1400 1401 /* store the column values to the file */ 1402 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1403 if (!rank) { 1404 MPI_Status status; 1405 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1406 for (i=1; i<size; i++) { 1407 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1408 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1409 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1410 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1411 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 } 1413 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1414 } else { 1415 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1416 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1418 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1419 } 1420 ierr = PetscFree(column_values);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1427 { 1428 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1429 PetscErrorCode ierr; 1430 PetscMPIInt rank = aij->rank,size = aij->size; 1431 PetscBool isdraw,iascii,isbinary; 1432 PetscViewer sviewer; 1433 PetscViewerFormat format; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1437 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1438 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1439 if (iascii) { 1440 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1441 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1442 MatInfo info; 1443 PetscBool inodes; 1444 1445 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1446 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1447 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1448 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1449 if (!inodes) { 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1451 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1452 } else { 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1454 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1455 } 1456 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1458 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1460 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1463 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1466 PetscInt inodecount,inodelimit,*inodes; 1467 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1468 if (inodes) { 1469 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1475 PetscFunctionReturn(0); 1476 } 1477 } else if (isbinary) { 1478 if (size == 1) { 1479 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1480 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1481 } else { 1482 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } else if (isdraw) { 1486 PetscDraw draw; 1487 PetscBool isnull; 1488 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1489 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1490 } 1491 1492 if (size == 1) { 1493 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1494 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1495 } else { 1496 /* assemble the entire matrix onto first processor. */ 1497 Mat A; 1498 Mat_SeqAIJ *Aloc; 1499 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1500 MatScalar *a; 1501 1502 if (mat->rmap->N > 1024) { 1503 PetscBool flg = PETSC_FALSE; 1504 1505 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1506 if (!flg) { 1507 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1508 } 1509 } 1510 1511 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1512 if (!rank) { 1513 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1514 } else { 1515 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1516 } 1517 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1518 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1519 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1520 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1521 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1522 1523 /* copy over the A part */ 1524 Aloc = (Mat_SeqAIJ*)aij->A->data; 1525 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1526 row = mat->rmap->rstart; 1527 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1528 for (i=0; i<m; i++) { 1529 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1530 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1531 } 1532 aj = Aloc->j; 1533 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1534 1535 /* copy over the B part */ 1536 Aloc = (Mat_SeqAIJ*)aij->B->data; 1537 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1538 row = mat->rmap->rstart; 1539 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1540 ct = cols; 1541 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1542 for (i=0; i<m; i++) { 1543 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1544 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1545 } 1546 ierr = PetscFree(ct);CHKERRQ(ierr); 1547 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1548 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1549 /* 1550 Everyone has to call to draw the matrix since the graphics waits are 1551 synchronized across all processors that share the PetscDraw object 1552 */ 1553 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1554 if (!rank) { 1555 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1556 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1557 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1558 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1559 } 1560 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1561 ierr = MatDestroy(&A);CHKERRQ(ierr); 1562 } 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatView_MPIAIJ" 1568 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1569 { 1570 PetscErrorCode ierr; 1571 PetscBool iascii,isdraw,issocket,isbinary; 1572 1573 PetscFunctionBegin; 1574 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1575 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1576 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1577 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1578 if (iascii || isdraw || isbinary || issocket) { 1579 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1580 } else { 1581 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1582 } 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatSOR_MPIAIJ" 1588 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1589 { 1590 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1591 PetscErrorCode ierr; 1592 Vec bb1 = 0; 1593 PetscBool hasop; 1594 1595 PetscFunctionBegin; 1596 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1597 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1598 } 1599 1600 if (flag == SOR_APPLY_UPPER) { 1601 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1602 PetscFunctionReturn(0); 1603 } 1604 1605 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1606 if (flag & SOR_ZERO_INITIAL_GUESS) { 1607 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1608 its--; 1609 } 1610 1611 while (its--) { 1612 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 1615 /* update rhs: bb1 = bb - B*x */ 1616 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1617 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1618 1619 /* local sweep */ 1620 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1621 } 1622 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1623 if (flag & SOR_ZERO_INITIAL_GUESS) { 1624 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1625 its--; 1626 } 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_EISENSTAT) { 1655 Vec xx1; 1656 1657 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1659 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 if (!mat->diag) { 1663 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1664 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1665 } 1666 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1667 if (hasop) { 1668 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1669 } else { 1670 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1671 } 1672 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1673 1674 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1675 1676 /* local sweep */ 1677 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1678 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1679 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1680 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1681 1682 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatPermute_MPIAIJ" 1688 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1689 { 1690 MPI_Comm comm; 1691 PetscInt first,local_rowsize,local_colsize; 1692 const PetscInt *rows; 1693 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1698 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1699 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1700 /* collect the global row permutation and invert it */ 1701 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1702 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1703 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1704 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1705 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1706 /* get the local target indices */ 1707 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1708 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1709 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1710 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1711 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1712 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1713 /* the column permutation is so much easier; 1714 make a local version of 'colp' and invert it */ 1715 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1716 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1717 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1718 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1719 /* now we just get the submatrix */ 1720 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1721 /* clean up */ 1722 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1723 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1729 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1730 { 1731 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1732 Mat A = mat->A,B = mat->B; 1733 PetscErrorCode ierr; 1734 PetscReal isend[5],irecv[5]; 1735 1736 PetscFunctionBegin; 1737 info->block_size = 1.0; 1738 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1739 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1740 isend[3] = info->memory; isend[4] = info->mallocs; 1741 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1742 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1743 isend[3] += info->memory; isend[4] += info->mallocs; 1744 if (flag == MAT_LOCAL) { 1745 info->nz_used = isend[0]; 1746 info->nz_allocated = isend[1]; 1747 info->nz_unneeded = isend[2]; 1748 info->memory = isend[3]; 1749 info->mallocs = isend[4]; 1750 } else if (flag == MAT_GLOBAL_MAX) { 1751 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1752 info->nz_used = irecv[0]; 1753 info->nz_allocated = irecv[1]; 1754 info->nz_unneeded = irecv[2]; 1755 info->memory = irecv[3]; 1756 info->mallocs = irecv[4]; 1757 } else if (flag == MAT_GLOBAL_SUM) { 1758 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1759 info->nz_used = irecv[0]; 1760 info->nz_allocated = irecv[1]; 1761 info->nz_unneeded = irecv[2]; 1762 info->memory = irecv[3]; 1763 info->mallocs = irecv[4]; 1764 } 1765 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1766 info->fill_ratio_needed = 0; 1767 info->factor_mallocs = 0; 1768 1769 PetscFunctionReturn(0); 1770 } 1771 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatSetOption_MPIAIJ" 1774 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1775 { 1776 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 switch (op) { 1781 case MAT_NEW_NONZERO_LOCATIONS: 1782 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1783 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1784 case MAT_KEEP_NONZERO_PATTERN: 1785 case MAT_NEW_NONZERO_LOCATION_ERR: 1786 case MAT_USE_INODES: 1787 case MAT_IGNORE_ZERO_ENTRIES: 1788 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1789 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1790 break; 1791 case MAT_ROW_ORIENTED: 1792 a->roworiented = flg; 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1795 break; 1796 case MAT_NEW_DIAGONALS: 1797 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1798 break; 1799 case MAT_IGNORE_OFF_PROC_ENTRIES: 1800 a->donotstash = flg; 1801 break; 1802 case MAT_SPD: 1803 A->spd_set = PETSC_TRUE; 1804 A->spd = flg; 1805 if (flg) { 1806 A->symmetric = PETSC_TRUE; 1807 A->structurally_symmetric = PETSC_TRUE; 1808 A->symmetric_set = PETSC_TRUE; 1809 A->structurally_symmetric_set = PETSC_TRUE; 1810 } 1811 break; 1812 case MAT_SYMMETRIC: 1813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1814 break; 1815 case MAT_STRUCTURALLY_SYMMETRIC: 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 break; 1818 case MAT_HERMITIAN: 1819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1820 break; 1821 case MAT_SYMMETRY_ETERNAL: 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 break; 1824 default: 1825 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1826 } 1827 PetscFunctionReturn(0); 1828 } 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatGetRow_MPIAIJ" 1832 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1833 { 1834 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1835 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1836 PetscErrorCode ierr; 1837 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1838 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1839 PetscInt *cmap,*idx_p; 1840 1841 PetscFunctionBegin; 1842 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1843 mat->getrowactive = PETSC_TRUE; 1844 1845 if (!mat->rowvalues && (idx || v)) { 1846 /* 1847 allocate enough space to hold information from the longest row. 1848 */ 1849 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1850 PetscInt max = 1,tmp; 1851 for (i=0; i<matin->rmap->n; i++) { 1852 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1853 if (max < tmp) { max = tmp; } 1854 } 1855 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1856 } 1857 1858 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1859 lrow = row - rstart; 1860 1861 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1862 if (!v) {pvA = 0; pvB = 0;} 1863 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1864 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1865 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1866 nztot = nzA + nzB; 1867 1868 cmap = mat->garray; 1869 if (v || idx) { 1870 if (nztot) { 1871 /* Sort by increasing column numbers, assuming A and B already sorted */ 1872 PetscInt imark = -1; 1873 if (v) { 1874 *v = v_p = mat->rowvalues; 1875 for (i=0; i<nzB; i++) { 1876 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1877 else break; 1878 } 1879 imark = i; 1880 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1881 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1882 } 1883 if (idx) { 1884 *idx = idx_p = mat->rowindices; 1885 if (imark > -1) { 1886 for (i=0; i<imark; i++) { 1887 idx_p[i] = cmap[cworkB[i]]; 1888 } 1889 } else { 1890 for (i=0; i<nzB; i++) { 1891 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1892 else break; 1893 } 1894 imark = i; 1895 } 1896 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1897 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1898 } 1899 } else { 1900 if (idx) *idx = 0; 1901 if (v) *v = 0; 1902 } 1903 } 1904 *nz = nztot; 1905 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1906 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1912 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1913 { 1914 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1915 1916 PetscFunctionBegin; 1917 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1918 aij->getrowactive = PETSC_FALSE; 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatNorm_MPIAIJ" 1924 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1925 { 1926 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1927 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1928 PetscErrorCode ierr; 1929 PetscInt i,j,cstart = mat->cmap->rstart; 1930 PetscReal sum = 0.0; 1931 MatScalar *v; 1932 1933 PetscFunctionBegin; 1934 if (aij->size == 1) { 1935 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1936 } else { 1937 if (type == NORM_FROBENIUS) { 1938 v = amat->a; 1939 for (i=0; i<amat->nz; i++) { 1940 #if defined(PETSC_USE_COMPLEX) 1941 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1942 #else 1943 sum += (*v)*(*v); v++; 1944 #endif 1945 } 1946 v = bmat->a; 1947 for (i=0; i<bmat->nz; i++) { 1948 #if defined(PETSC_USE_COMPLEX) 1949 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1950 #else 1951 sum += (*v)*(*v); v++; 1952 #endif 1953 } 1954 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1955 *norm = PetscSqrtReal(*norm); 1956 } else if (type == NORM_1) { /* max column norm */ 1957 PetscReal *tmp,*tmp2; 1958 PetscInt *jj,*garray = aij->garray; 1959 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1960 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1961 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1962 *norm = 0.0; 1963 v = amat->a; jj = amat->j; 1964 for (j=0; j<amat->nz; j++) { 1965 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1966 } 1967 v = bmat->a; jj = bmat->j; 1968 for (j=0; j<bmat->nz; j++) { 1969 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1970 } 1971 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1972 for (j=0; j<mat->cmap->N; j++) { 1973 if (tmp2[j] > *norm) *norm = tmp2[j]; 1974 } 1975 ierr = PetscFree(tmp);CHKERRQ(ierr); 1976 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1977 } else if (type == NORM_INFINITY) { /* max row norm */ 1978 PetscReal ntemp = 0.0; 1979 for (j=0; j<aij->A->rmap->n; j++) { 1980 v = amat->a + amat->i[j]; 1981 sum = 0.0; 1982 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1983 sum += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a + bmat->i[j]; 1986 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 if (sum > ntemp) ntemp = sum; 1990 } 1991 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1992 } else { 1993 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1994 } 1995 } 1996 PetscFunctionReturn(0); 1997 } 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "MatTranspose_MPIAIJ" 2001 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2002 { 2003 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2004 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2005 PetscErrorCode ierr; 2006 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2007 PetscInt cstart=A->cmap->rstart,ncol; 2008 Mat B; 2009 MatScalar *array; 2010 2011 PetscFunctionBegin; 2012 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2013 2014 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2015 ai = Aloc->i; aj = Aloc->j; 2016 bi = Bloc->i; bj = Bloc->j; 2017 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2018 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2019 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2020 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2021 for (i=0; i<ai[ma]; i++){ 2022 d_nnz[aj[i]] ++; 2023 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2024 } 2025 2026 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2027 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2028 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2029 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2030 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2031 } else { 2032 B = *matout; 2033 } 2034 2035 /* copy over the A part */ 2036 array = Aloc->a; 2037 row = A->rmap->rstart; 2038 for (i=0; i<ma; i++) { 2039 ncol = ai[i+1]-ai[i]; 2040 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2041 row++; array += ncol; aj += ncol; 2042 } 2043 aj = Aloc->j; 2044 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2045 2046 /* copy over the B part */ 2047 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2048 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2049 array = Bloc->a; 2050 row = A->rmap->rstart; 2051 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2052 cols_tmp = cols; 2053 for (i=0; i<mb; i++) { 2054 ncol = bi[i+1]-bi[i]; 2055 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2056 row++; array += ncol; cols_tmp += ncol; 2057 } 2058 ierr = PetscFree(cols);CHKERRQ(ierr); 2059 2060 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2061 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2062 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2063 *matout = B; 2064 } else { 2065 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2066 } 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2072 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2073 { 2074 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2075 Mat a = aij->A,b = aij->B; 2076 PetscErrorCode ierr; 2077 PetscInt s1,s2,s3; 2078 2079 PetscFunctionBegin; 2080 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2081 if (rr) { 2082 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2083 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2084 /* Overlap communication with computation. */ 2085 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2086 } 2087 if (ll) { 2088 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2089 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2090 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2091 } 2092 /* scale the diagonal block */ 2093 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2094 2095 if (rr) { 2096 /* Do a scatter end and then right scale the off-diagonal block */ 2097 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2098 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2099 } 2100 2101 PetscFunctionReturn(0); 2102 } 2103 2104 #undef __FUNCT__ 2105 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2106 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2107 { 2108 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2109 PetscErrorCode ierr; 2110 2111 PetscFunctionBegin; 2112 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2113 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2114 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2115 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 #undef __FUNCT__ 2119 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2120 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2121 { 2122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2123 PetscErrorCode ierr; 2124 2125 PetscFunctionBegin; 2126 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2127 PetscFunctionReturn(0); 2128 } 2129 2130 #undef __FUNCT__ 2131 #define __FUNCT__ "MatEqual_MPIAIJ" 2132 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2133 { 2134 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2135 Mat a,b,c,d; 2136 PetscBool flg; 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 a = matA->A; b = matA->B; 2141 c = matB->A; d = matB->B; 2142 2143 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2144 if (flg) { 2145 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2146 } 2147 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatCopy_MPIAIJ" 2153 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2154 { 2155 PetscErrorCode ierr; 2156 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2157 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2158 2159 PetscFunctionBegin; 2160 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2161 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2162 /* because of the column compression in the off-processor part of the matrix a->B, 2163 the number of columns in a->B and b->B may be different, hence we cannot call 2164 the MatCopy() directly on the two parts. If need be, we can provide a more 2165 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2166 then copying the submatrices */ 2167 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2168 } else { 2169 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2170 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2171 } 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2177 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2178 { 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2183 PetscFunctionReturn(0); 2184 } 2185 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "MatAXPY_MPIAIJ" 2188 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2189 { 2190 PetscErrorCode ierr; 2191 PetscInt i; 2192 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2193 PetscBLASInt bnz,one=1; 2194 Mat_SeqAIJ *x,*y; 2195 2196 PetscFunctionBegin; 2197 if (str == SAME_NONZERO_PATTERN) { 2198 PetscScalar alpha = a; 2199 x = (Mat_SeqAIJ *)xx->A->data; 2200 y = (Mat_SeqAIJ *)yy->A->data; 2201 bnz = PetscBLASIntCast(x->nz); 2202 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2203 x = (Mat_SeqAIJ *)xx->B->data; 2204 y = (Mat_SeqAIJ *)yy->B->data; 2205 bnz = PetscBLASIntCast(x->nz); 2206 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2207 } else if (str == SUBSET_NONZERO_PATTERN) { 2208 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2209 2210 x = (Mat_SeqAIJ *)xx->B->data; 2211 y = (Mat_SeqAIJ *)yy->B->data; 2212 if (y->xtoy && y->XtoY != xx->B) { 2213 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2214 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2215 } 2216 if (!y->xtoy) { /* get xtoy */ 2217 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2218 y->XtoY = xx->B; 2219 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2220 } 2221 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2222 } else { 2223 Mat B; 2224 PetscInt *nnz_d,*nnz_o; 2225 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2226 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2227 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2228 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2229 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2230 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2231 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2232 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2233 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2234 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2235 ierr = MatHeaderReplace(Y,B); 2236 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2237 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2238 } 2239 PetscFunctionReturn(0); 2240 } 2241 2242 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "MatConjugate_MPIAIJ" 2246 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2247 { 2248 #if defined(PETSC_USE_COMPLEX) 2249 PetscErrorCode ierr; 2250 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2251 2252 PetscFunctionBegin; 2253 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2254 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2255 #else 2256 PetscFunctionBegin; 2257 #endif 2258 PetscFunctionReturn(0); 2259 } 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatRealPart_MPIAIJ" 2263 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2264 { 2265 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2266 PetscErrorCode ierr; 2267 2268 PetscFunctionBegin; 2269 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2270 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2271 PetscFunctionReturn(0); 2272 } 2273 2274 #undef __FUNCT__ 2275 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2276 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2277 { 2278 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2279 PetscErrorCode ierr; 2280 2281 PetscFunctionBegin; 2282 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2283 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2284 PetscFunctionReturn(0); 2285 } 2286 2287 #ifdef PETSC_HAVE_PBGL 2288 2289 #include <boost/parallel/mpi/bsp_process_group.hpp> 2290 #include <boost/graph/distributed/ilu_default_graph.hpp> 2291 #include <boost/graph/distributed/ilu_0_block.hpp> 2292 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2293 #include <boost/graph/distributed/petsc/interface.hpp> 2294 #include <boost/multi_array.hpp> 2295 #include <boost/parallel/distributed_property_map->hpp> 2296 2297 #undef __FUNCT__ 2298 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2299 /* 2300 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2301 */ 2302 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2303 { 2304 namespace petsc = boost::distributed::petsc; 2305 2306 namespace graph_dist = boost::graph::distributed; 2307 using boost::graph::distributed::ilu_default::process_group_type; 2308 using boost::graph::ilu_permuted; 2309 2310 PetscBool row_identity, col_identity; 2311 PetscContainer c; 2312 PetscInt m, n, M, N; 2313 PetscErrorCode ierr; 2314 2315 PetscFunctionBegin; 2316 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2317 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2318 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2319 if (!row_identity || !col_identity) { 2320 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2321 } 2322 2323 process_group_type pg; 2324 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2325 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2326 lgraph_type& level_graph = *lgraph_p; 2327 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2328 2329 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2330 ilu_permuted(level_graph); 2331 2332 /* put together the new matrix */ 2333 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2334 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2335 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2336 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2337 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2338 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2339 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2340 2341 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2342 ierr = PetscContainerSetPointer(c, lgraph_p); 2343 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2344 ierr = PetscContainerDestroy(&c); 2345 PetscFunctionReturn(0); 2346 } 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2350 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2351 { 2352 PetscFunctionBegin; 2353 PetscFunctionReturn(0); 2354 } 2355 2356 #undef __FUNCT__ 2357 #define __FUNCT__ "MatSolve_MPIAIJ" 2358 /* 2359 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2360 */ 2361 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2362 { 2363 namespace graph_dist = boost::graph::distributed; 2364 2365 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2366 lgraph_type* lgraph_p; 2367 PetscContainer c; 2368 PetscErrorCode ierr; 2369 2370 PetscFunctionBegin; 2371 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2372 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2373 ierr = VecCopy(b, x);CHKERRQ(ierr); 2374 2375 PetscScalar* array_x; 2376 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2377 PetscInt sx; 2378 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2379 2380 PetscScalar* array_b; 2381 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2382 PetscInt sb; 2383 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2384 2385 lgraph_type& level_graph = *lgraph_p; 2386 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2387 2388 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2389 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2390 ref_x(array_x, boost::extents[num_vertices(graph)]); 2391 2392 typedef boost::iterator_property_map<array_ref_type::iterator, 2393 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2394 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2395 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2396 2397 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2398 2399 PetscFunctionReturn(0); 2400 } 2401 #endif 2402 2403 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2404 PetscInt nzlocal,nsends,nrecvs; 2405 PetscMPIInt *send_rank,*recv_rank; 2406 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2407 PetscScalar *sbuf_a,**rbuf_a; 2408 PetscErrorCode (*Destroy)(Mat); 2409 } Mat_Redundant; 2410 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2413 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2414 { 2415 PetscErrorCode ierr; 2416 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2417 PetscInt i; 2418 2419 PetscFunctionBegin; 2420 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2421 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2422 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2423 for (i=0; i<redund->nrecvs; i++){ 2424 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2425 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2426 } 2427 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2428 ierr = PetscFree(redund);CHKERRQ(ierr); 2429 PetscFunctionReturn(0); 2430 } 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "MatDestroy_MatRedundant" 2434 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2435 { 2436 PetscErrorCode ierr; 2437 PetscContainer container; 2438 Mat_Redundant *redund=PETSC_NULL; 2439 2440 PetscFunctionBegin; 2441 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2442 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2443 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2444 A->ops->destroy = redund->Destroy; 2445 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2446 if (A->ops->destroy) { 2447 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2448 } 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2454 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2455 { 2456 PetscMPIInt rank,size; 2457 MPI_Comm comm=((PetscObject)mat)->comm; 2458 PetscErrorCode ierr; 2459 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2460 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2461 PetscInt *rowrange=mat->rmap->range; 2462 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2463 Mat A=aij->A,B=aij->B,C=*matredundant; 2464 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2465 PetscScalar *sbuf_a; 2466 PetscInt nzlocal=a->nz+b->nz; 2467 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2468 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2469 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2470 MatScalar *aworkA,*aworkB; 2471 PetscScalar *vals; 2472 PetscMPIInt tag1,tag2,tag3,imdex; 2473 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2474 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2475 MPI_Status recv_status,*send_status; 2476 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2477 PetscInt **rbuf_j=PETSC_NULL; 2478 PetscScalar **rbuf_a=PETSC_NULL; 2479 Mat_Redundant *redund=PETSC_NULL; 2480 PetscContainer container; 2481 2482 PetscFunctionBegin; 2483 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2484 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2485 2486 if (reuse == MAT_REUSE_MATRIX) { 2487 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2488 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2489 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2490 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2491 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2492 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2493 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2494 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2495 2496 nsends = redund->nsends; 2497 nrecvs = redund->nrecvs; 2498 send_rank = redund->send_rank; 2499 recv_rank = redund->recv_rank; 2500 sbuf_nz = redund->sbuf_nz; 2501 rbuf_nz = redund->rbuf_nz; 2502 sbuf_j = redund->sbuf_j; 2503 sbuf_a = redund->sbuf_a; 2504 rbuf_j = redund->rbuf_j; 2505 rbuf_a = redund->rbuf_a; 2506 } 2507 2508 if (reuse == MAT_INITIAL_MATRIX){ 2509 PetscMPIInt subrank,subsize; 2510 PetscInt nleftover,np_subcomm; 2511 /* get the destination processors' id send_rank, nsends and nrecvs */ 2512 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2513 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2514 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2515 np_subcomm = size/nsubcomm; 2516 nleftover = size - nsubcomm*np_subcomm; 2517 nsends = 0; nrecvs = 0; 2518 for (i=0; i<size; i++){ /* i=rank*/ 2519 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2520 send_rank[nsends] = i; nsends++; 2521 recv_rank[nrecvs++] = i; 2522 } 2523 } 2524 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2525 i = size-nleftover-1; 2526 j = 0; 2527 while (j < nsubcomm - nleftover){ 2528 send_rank[nsends++] = i; 2529 i--; j++; 2530 } 2531 } 2532 2533 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2534 for (i=0; i<nleftover; i++){ 2535 recv_rank[nrecvs++] = size-nleftover+i; 2536 } 2537 } 2538 2539 /* allocate sbuf_j, sbuf_a */ 2540 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2541 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2542 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2543 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2544 2545 /* copy mat's local entries into the buffers */ 2546 if (reuse == MAT_INITIAL_MATRIX){ 2547 rownz_max = 0; 2548 rptr = sbuf_j; 2549 cols = sbuf_j + rend-rstart + 1; 2550 vals = sbuf_a; 2551 rptr[0] = 0; 2552 for (i=0; i<rend-rstart; i++){ 2553 row = i + rstart; 2554 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2555 ncols = nzA + nzB; 2556 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2557 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2558 /* load the column indices for this row into cols */ 2559 lwrite = 0; 2560 for (l=0; l<nzB; l++) { 2561 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2562 vals[lwrite] = aworkB[l]; 2563 cols[lwrite++] = ctmp; 2564 } 2565 } 2566 for (l=0; l<nzA; l++){ 2567 vals[lwrite] = aworkA[l]; 2568 cols[lwrite++] = cstart + cworkA[l]; 2569 } 2570 for (l=0; l<nzB; l++) { 2571 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2572 vals[lwrite] = aworkB[l]; 2573 cols[lwrite++] = ctmp; 2574 } 2575 } 2576 vals += ncols; 2577 cols += ncols; 2578 rptr[i+1] = rptr[i] + ncols; 2579 if (rownz_max < ncols) rownz_max = ncols; 2580 } 2581 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2582 } else { /* only copy matrix values into sbuf_a */ 2583 rptr = sbuf_j; 2584 vals = sbuf_a; 2585 rptr[0] = 0; 2586 for (i=0; i<rend-rstart; i++){ 2587 row = i + rstart; 2588 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2589 ncols = nzA + nzB; 2590 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2591 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2592 lwrite = 0; 2593 for (l=0; l<nzB; l++) { 2594 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2595 } 2596 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2597 for (l=0; l<nzB; l++) { 2598 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2599 } 2600 vals += ncols; 2601 rptr[i+1] = rptr[i] + ncols; 2602 } 2603 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2604 2605 /* send nzlocal to others, and recv other's nzlocal */ 2606 /*--------------------------------------------------*/ 2607 if (reuse == MAT_INITIAL_MATRIX){ 2608 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2609 s_waits2 = s_waits3 + nsends; 2610 s_waits1 = s_waits2 + nsends; 2611 r_waits1 = s_waits1 + nsends; 2612 r_waits2 = r_waits1 + nrecvs; 2613 r_waits3 = r_waits2 + nrecvs; 2614 } else { 2615 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2616 r_waits3 = s_waits3 + nsends; 2617 } 2618 2619 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2620 if (reuse == MAT_INITIAL_MATRIX){ 2621 /* get new tags to keep the communication clean */ 2622 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2623 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2624 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2625 2626 /* post receives of other's nzlocal */ 2627 for (i=0; i<nrecvs; i++){ 2628 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2629 } 2630 /* send nzlocal to others */ 2631 for (i=0; i<nsends; i++){ 2632 sbuf_nz[i] = nzlocal; 2633 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2634 } 2635 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2636 count = nrecvs; 2637 while (count) { 2638 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2639 recv_rank[imdex] = recv_status.MPI_SOURCE; 2640 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2641 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2642 2643 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2644 rbuf_nz[imdex] += i + 2; 2645 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2646 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2647 count--; 2648 } 2649 /* wait on sends of nzlocal */ 2650 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2651 /* send mat->i,j to others, and recv from other's */ 2652 /*------------------------------------------------*/ 2653 for (i=0; i<nsends; i++){ 2654 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2655 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2656 } 2657 /* wait on receives of mat->i,j */ 2658 /*------------------------------*/ 2659 count = nrecvs; 2660 while (count) { 2661 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2662 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2663 count--; 2664 } 2665 /* wait on sends of mat->i,j */ 2666 /*---------------------------*/ 2667 if (nsends) { 2668 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2669 } 2670 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2671 2672 /* post receives, send and receive mat->a */ 2673 /*----------------------------------------*/ 2674 for (imdex=0; imdex<nrecvs; imdex++) { 2675 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2676 } 2677 for (i=0; i<nsends; i++){ 2678 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2679 } 2680 count = nrecvs; 2681 while (count) { 2682 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2683 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2684 count--; 2685 } 2686 if (nsends) { 2687 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2688 } 2689 2690 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2691 2692 /* create redundant matrix */ 2693 /*-------------------------*/ 2694 if (reuse == MAT_INITIAL_MATRIX){ 2695 /* compute rownz_max for preallocation */ 2696 for (imdex=0; imdex<nrecvs; imdex++){ 2697 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2698 rptr = rbuf_j[imdex]; 2699 for (i=0; i<j; i++){ 2700 ncols = rptr[i+1] - rptr[i]; 2701 if (rownz_max < ncols) rownz_max = ncols; 2702 } 2703 } 2704 2705 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2706 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2707 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2708 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2709 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2710 } else { 2711 C = *matredundant; 2712 } 2713 2714 /* insert local matrix entries */ 2715 rptr = sbuf_j; 2716 cols = sbuf_j + rend-rstart + 1; 2717 vals = sbuf_a; 2718 for (i=0; i<rend-rstart; i++){ 2719 row = i + rstart; 2720 ncols = rptr[i+1] - rptr[i]; 2721 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2722 vals += ncols; 2723 cols += ncols; 2724 } 2725 /* insert received matrix entries */ 2726 for (imdex=0; imdex<nrecvs; imdex++){ 2727 rstart = rowrange[recv_rank[imdex]]; 2728 rend = rowrange[recv_rank[imdex]+1]; 2729 rptr = rbuf_j[imdex]; 2730 cols = rbuf_j[imdex] + rend-rstart + 1; 2731 vals = rbuf_a[imdex]; 2732 for (i=0; i<rend-rstart; i++){ 2733 row = i + rstart; 2734 ncols = rptr[i+1] - rptr[i]; 2735 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2736 vals += ncols; 2737 cols += ncols; 2738 } 2739 } 2740 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2741 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2742 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2743 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2744 if (reuse == MAT_INITIAL_MATRIX) { 2745 PetscContainer container; 2746 *matredundant = C; 2747 /* create a supporting struct and attach it to C for reuse */ 2748 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2749 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2750 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2751 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2752 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2753 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2754 2755 redund->nzlocal = nzlocal; 2756 redund->nsends = nsends; 2757 redund->nrecvs = nrecvs; 2758 redund->send_rank = send_rank; 2759 redund->recv_rank = recv_rank; 2760 redund->sbuf_nz = sbuf_nz; 2761 redund->rbuf_nz = rbuf_nz; 2762 redund->sbuf_j = sbuf_j; 2763 redund->sbuf_a = sbuf_a; 2764 redund->rbuf_j = rbuf_j; 2765 redund->rbuf_a = rbuf_a; 2766 2767 redund->Destroy = C->ops->destroy; 2768 C->ops->destroy = MatDestroy_MatRedundant; 2769 } 2770 PetscFunctionReturn(0); 2771 } 2772 2773 #undef __FUNCT__ 2774 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2775 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2776 { 2777 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2778 PetscErrorCode ierr; 2779 PetscInt i,*idxb = 0; 2780 PetscScalar *va,*vb; 2781 Vec vtmp; 2782 2783 PetscFunctionBegin; 2784 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2785 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2786 if (idx) { 2787 for (i=0; i<A->rmap->n; i++) { 2788 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2789 } 2790 } 2791 2792 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2793 if (idx) { 2794 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2795 } 2796 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2797 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2798 2799 for (i=0; i<A->rmap->n; i++){ 2800 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2801 va[i] = vb[i]; 2802 if (idx) idx[i] = a->garray[idxb[i]]; 2803 } 2804 } 2805 2806 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2807 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2808 ierr = PetscFree(idxb);CHKERRQ(ierr); 2809 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2810 PetscFunctionReturn(0); 2811 } 2812 2813 #undef __FUNCT__ 2814 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2815 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2816 { 2817 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2818 PetscErrorCode ierr; 2819 PetscInt i,*idxb = 0; 2820 PetscScalar *va,*vb; 2821 Vec vtmp; 2822 2823 PetscFunctionBegin; 2824 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2825 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2826 if (idx) { 2827 for (i=0; i<A->cmap->n; i++) { 2828 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2829 } 2830 } 2831 2832 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2833 if (idx) { 2834 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2835 } 2836 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2837 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2838 2839 for (i=0; i<A->rmap->n; i++){ 2840 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2841 va[i] = vb[i]; 2842 if (idx) idx[i] = a->garray[idxb[i]]; 2843 } 2844 } 2845 2846 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2847 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2848 ierr = PetscFree(idxb);CHKERRQ(ierr); 2849 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2850 PetscFunctionReturn(0); 2851 } 2852 2853 #undef __FUNCT__ 2854 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2855 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2856 { 2857 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2858 PetscInt n = A->rmap->n; 2859 PetscInt cstart = A->cmap->rstart; 2860 PetscInt *cmap = mat->garray; 2861 PetscInt *diagIdx, *offdiagIdx; 2862 Vec diagV, offdiagV; 2863 PetscScalar *a, *diagA, *offdiagA; 2864 PetscInt r; 2865 PetscErrorCode ierr; 2866 2867 PetscFunctionBegin; 2868 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2869 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2870 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2871 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2872 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2873 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2874 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2875 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2876 for(r = 0; r < n; ++r) { 2877 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2878 a[r] = diagA[r]; 2879 idx[r] = cstart + diagIdx[r]; 2880 } else { 2881 a[r] = offdiagA[r]; 2882 idx[r] = cmap[offdiagIdx[r]]; 2883 } 2884 } 2885 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2886 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2887 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2888 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2889 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2890 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2891 PetscFunctionReturn(0); 2892 } 2893 2894 #undef __FUNCT__ 2895 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2896 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2897 { 2898 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2899 PetscInt n = A->rmap->n; 2900 PetscInt cstart = A->cmap->rstart; 2901 PetscInt *cmap = mat->garray; 2902 PetscInt *diagIdx, *offdiagIdx; 2903 Vec diagV, offdiagV; 2904 PetscScalar *a, *diagA, *offdiagA; 2905 PetscInt r; 2906 PetscErrorCode ierr; 2907 2908 PetscFunctionBegin; 2909 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2910 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2911 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2912 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2913 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2914 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2915 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2916 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2917 for(r = 0; r < n; ++r) { 2918 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2919 a[r] = diagA[r]; 2920 idx[r] = cstart + diagIdx[r]; 2921 } else { 2922 a[r] = offdiagA[r]; 2923 idx[r] = cmap[offdiagIdx[r]]; 2924 } 2925 } 2926 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2928 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2929 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2930 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2931 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2932 PetscFunctionReturn(0); 2933 } 2934 2935 #undef __FUNCT__ 2936 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2937 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2938 { 2939 PetscErrorCode ierr; 2940 Mat *dummy; 2941 2942 PetscFunctionBegin; 2943 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2944 *newmat = *dummy; 2945 ierr = PetscFree(dummy);CHKERRQ(ierr); 2946 PetscFunctionReturn(0); 2947 } 2948 2949 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2950 2951 #undef __FUNCT__ 2952 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2953 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2954 { 2955 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2956 PetscErrorCode ierr; 2957 2958 PetscFunctionBegin; 2959 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2960 PetscFunctionReturn(0); 2961 } 2962 2963 2964 /* -------------------------------------------------------------------*/ 2965 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2966 MatGetRow_MPIAIJ, 2967 MatRestoreRow_MPIAIJ, 2968 MatMult_MPIAIJ, 2969 /* 4*/ MatMultAdd_MPIAIJ, 2970 MatMultTranspose_MPIAIJ, 2971 MatMultTransposeAdd_MPIAIJ, 2972 #ifdef PETSC_HAVE_PBGL 2973 MatSolve_MPIAIJ, 2974 #else 2975 0, 2976 #endif 2977 0, 2978 0, 2979 /*10*/ 0, 2980 0, 2981 0, 2982 MatSOR_MPIAIJ, 2983 MatTranspose_MPIAIJ, 2984 /*15*/ MatGetInfo_MPIAIJ, 2985 MatEqual_MPIAIJ, 2986 MatGetDiagonal_MPIAIJ, 2987 MatDiagonalScale_MPIAIJ, 2988 MatNorm_MPIAIJ, 2989 /*20*/ MatAssemblyBegin_MPIAIJ, 2990 MatAssemblyEnd_MPIAIJ, 2991 MatSetOption_MPIAIJ, 2992 MatZeroEntries_MPIAIJ, 2993 /*24*/ MatZeroRows_MPIAIJ, 2994 0, 2995 #ifdef PETSC_HAVE_PBGL 2996 0, 2997 #else 2998 0, 2999 #endif 3000 0, 3001 0, 3002 /*29*/ MatSetUpPreallocation_MPIAIJ, 3003 #ifdef PETSC_HAVE_PBGL 3004 0, 3005 #else 3006 0, 3007 #endif 3008 0, 3009 0, 3010 0, 3011 /*34*/ MatDuplicate_MPIAIJ, 3012 0, 3013 0, 3014 0, 3015 0, 3016 /*39*/ MatAXPY_MPIAIJ, 3017 MatGetSubMatrices_MPIAIJ, 3018 MatIncreaseOverlap_MPIAIJ, 3019 MatGetValues_MPIAIJ, 3020 MatCopy_MPIAIJ, 3021 /*44*/ MatGetRowMax_MPIAIJ, 3022 MatScale_MPIAIJ, 3023 0, 3024 0, 3025 MatZeroRowsColumns_MPIAIJ, 3026 /*49*/ MatSetBlockSize_MPIAIJ, 3027 0, 3028 0, 3029 0, 3030 0, 3031 /*54*/ MatFDColoringCreate_MPIAIJ, 3032 0, 3033 MatSetUnfactored_MPIAIJ, 3034 MatPermute_MPIAIJ, 3035 0, 3036 /*59*/ MatGetSubMatrix_MPIAIJ, 3037 MatDestroy_MPIAIJ, 3038 MatView_MPIAIJ, 3039 0, 3040 0, 3041 /*64*/ 0, 3042 0, 3043 0, 3044 0, 3045 0, 3046 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3047 MatGetRowMinAbs_MPIAIJ, 3048 0, 3049 MatSetColoring_MPIAIJ, 3050 #if defined(PETSC_HAVE_ADIC) 3051 MatSetValuesAdic_MPIAIJ, 3052 #else 3053 0, 3054 #endif 3055 MatSetValuesAdifor_MPIAIJ, 3056 /*75*/ MatFDColoringApply_AIJ, 3057 0, 3058 0, 3059 0, 3060 0, 3061 /*80*/ 0, 3062 0, 3063 0, 3064 /*83*/ MatLoad_MPIAIJ, 3065 0, 3066 0, 3067 0, 3068 0, 3069 0, 3070 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3071 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3072 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3073 MatPtAP_Basic, 3074 MatPtAPSymbolic_MPIAIJ, 3075 /*94*/ MatPtAPNumeric_MPIAIJ, 3076 0, 3077 0, 3078 0, 3079 0, 3080 /*99*/ 0, 3081 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3082 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3083 MatConjugate_MPIAIJ, 3084 0, 3085 /*104*/MatSetValuesRow_MPIAIJ, 3086 MatRealPart_MPIAIJ, 3087 MatImaginaryPart_MPIAIJ, 3088 0, 3089 0, 3090 /*109*/0, 3091 MatGetRedundantMatrix_MPIAIJ, 3092 MatGetRowMin_MPIAIJ, 3093 0, 3094 0, 3095 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3096 0, 3097 0, 3098 0, 3099 0, 3100 /*119*/0, 3101 0, 3102 0, 3103 0, 3104 MatGetMultiProcBlock_MPIAIJ, 3105 /*124*/MatFindNonZeroRows_MPIAIJ, 3106 MatGetColumnNorms_MPIAIJ, 3107 MatInvertBlockDiagonal_MPIAIJ, 3108 0, 3109 MatGetSubMatricesParallel_MPIAIJ, 3110 /*129*/0, 3111 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3112 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3113 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3114 0, 3115 /*134*/0, 3116 0, 3117 0, 3118 0, 3119 0 3120 }; 3121 3122 /* ----------------------------------------------------------------------------------------*/ 3123 3124 EXTERN_C_BEGIN 3125 #undef __FUNCT__ 3126 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3127 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3128 { 3129 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3130 PetscErrorCode ierr; 3131 3132 PetscFunctionBegin; 3133 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3134 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3135 PetscFunctionReturn(0); 3136 } 3137 EXTERN_C_END 3138 3139 EXTERN_C_BEGIN 3140 #undef __FUNCT__ 3141 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3142 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3143 { 3144 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3145 PetscErrorCode ierr; 3146 3147 PetscFunctionBegin; 3148 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3149 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3150 PetscFunctionReturn(0); 3151 } 3152 EXTERN_C_END 3153 3154 EXTERN_C_BEGIN 3155 #undef __FUNCT__ 3156 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3157 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3158 { 3159 Mat_MPIAIJ *b; 3160 PetscErrorCode ierr; 3161 PetscInt i; 3162 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3163 3164 PetscFunctionBegin; 3165 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3166 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3167 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3168 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3169 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3170 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3171 3172 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3173 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3174 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3175 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3176 if (d_nnz) { 3177 for (i=0; i<B->rmap->n; i++) { 3178 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3179 } 3180 } 3181 if (o_nnz) { 3182 for (i=0; i<B->rmap->n; i++) { 3183 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3184 } 3185 } 3186 b = (Mat_MPIAIJ*)B->data; 3187 3188 if (!B->preallocated) { 3189 /* Explicitly create 2 MATSEQAIJ matrices. */ 3190 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3191 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3192 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3193 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3194 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3195 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3196 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3197 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3198 } 3199 3200 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3201 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3202 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3203 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3204 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3205 B->preallocated = PETSC_TRUE; 3206 PetscFunctionReturn(0); 3207 } 3208 EXTERN_C_END 3209 3210 #undef __FUNCT__ 3211 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3212 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3213 { 3214 Mat mat; 3215 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3216 PetscErrorCode ierr; 3217 3218 PetscFunctionBegin; 3219 *newmat = 0; 3220 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3221 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3222 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3223 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3224 a = (Mat_MPIAIJ*)mat->data; 3225 3226 mat->factortype = matin->factortype; 3227 mat->rmap->bs = matin->rmap->bs; 3228 mat->assembled = PETSC_TRUE; 3229 mat->insertmode = NOT_SET_VALUES; 3230 mat->preallocated = PETSC_TRUE; 3231 3232 a->size = oldmat->size; 3233 a->rank = oldmat->rank; 3234 a->donotstash = oldmat->donotstash; 3235 a->roworiented = oldmat->roworiented; 3236 a->rowindices = 0; 3237 a->rowvalues = 0; 3238 a->getrowactive = PETSC_FALSE; 3239 3240 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3241 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3242 3243 if (oldmat->colmap) { 3244 #if defined (PETSC_USE_CTABLE) 3245 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3246 #else 3247 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3248 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3249 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3250 #endif 3251 } else a->colmap = 0; 3252 if (oldmat->garray) { 3253 PetscInt len; 3254 len = oldmat->B->cmap->n; 3255 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3256 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3257 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3258 } else a->garray = 0; 3259 3260 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3261 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3262 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3263 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3264 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3265 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3266 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3267 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3268 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3269 *newmat = mat; 3270 PetscFunctionReturn(0); 3271 } 3272 3273 3274 3275 #undef __FUNCT__ 3276 #define __FUNCT__ "MatLoad_MPIAIJ" 3277 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3278 { 3279 PetscScalar *vals,*svals; 3280 MPI_Comm comm = ((PetscObject)viewer)->comm; 3281 PetscErrorCode ierr; 3282 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3283 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3284 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3285 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3286 PetscInt cend,cstart,n,*rowners,sizesset=1; 3287 int fd; 3288 3289 PetscFunctionBegin; 3290 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3291 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3292 if (!rank) { 3293 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3294 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3295 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3296 } 3297 3298 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3299 3300 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3301 M = header[1]; N = header[2]; 3302 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3303 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3304 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3305 3306 /* If global sizes are set, check if they are consistent with that given in the file */ 3307 if (sizesset) { 3308 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3309 } 3310 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3311 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3312 3313 /* determine ownership of all rows */ 3314 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3315 else m = newMat->rmap->n; /* Set by user */ 3316 3317 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3318 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3319 3320 /* First process needs enough room for process with most rows */ 3321 if (!rank) { 3322 mmax = rowners[1]; 3323 for (i=2; i<size; i++) { 3324 mmax = PetscMax(mmax,rowners[i]); 3325 } 3326 } else mmax = m; 3327 3328 rowners[0] = 0; 3329 for (i=2; i<=size; i++) { 3330 rowners[i] += rowners[i-1]; 3331 } 3332 rstart = rowners[rank]; 3333 rend = rowners[rank+1]; 3334 3335 /* distribute row lengths to all processors */ 3336 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3337 if (!rank) { 3338 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3339 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3340 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3341 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3342 for (j=0; j<m; j++) { 3343 procsnz[0] += ourlens[j]; 3344 } 3345 for (i=1; i<size; i++) { 3346 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3347 /* calculate the number of nonzeros on each processor */ 3348 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3349 procsnz[i] += rowlengths[j]; 3350 } 3351 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3352 } 3353 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3354 } else { 3355 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3356 } 3357 3358 if (!rank) { 3359 /* determine max buffer needed and allocate it */ 3360 maxnz = 0; 3361 for (i=0; i<size; i++) { 3362 maxnz = PetscMax(maxnz,procsnz[i]); 3363 } 3364 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3365 3366 /* read in my part of the matrix column indices */ 3367 nz = procsnz[0]; 3368 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3369 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3370 3371 /* read in every one elses and ship off */ 3372 for (i=1; i<size; i++) { 3373 nz = procsnz[i]; 3374 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3375 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3376 } 3377 ierr = PetscFree(cols);CHKERRQ(ierr); 3378 } else { 3379 /* determine buffer space needed for message */ 3380 nz = 0; 3381 for (i=0; i<m; i++) { 3382 nz += ourlens[i]; 3383 } 3384 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3385 3386 /* receive message of column indices*/ 3387 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3388 } 3389 3390 /* determine column ownership if matrix is not square */ 3391 if (N != M) { 3392 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3393 else n = newMat->cmap->n; 3394 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3395 cstart = cend - n; 3396 } else { 3397 cstart = rstart; 3398 cend = rend; 3399 n = cend - cstart; 3400 } 3401 3402 /* loop over local rows, determining number of off diagonal entries */ 3403 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3404 jj = 0; 3405 for (i=0; i<m; i++) { 3406 for (j=0; j<ourlens[i]; j++) { 3407 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3408 jj++; 3409 } 3410 } 3411 3412 for (i=0; i<m; i++) { 3413 ourlens[i] -= offlens[i]; 3414 } 3415 if (!sizesset) { 3416 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3417 } 3418 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3419 3420 for (i=0; i<m; i++) { 3421 ourlens[i] += offlens[i]; 3422 } 3423 3424 if (!rank) { 3425 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3426 3427 /* read in my part of the matrix numerical values */ 3428 nz = procsnz[0]; 3429 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3430 3431 /* insert into matrix */ 3432 jj = rstart; 3433 smycols = mycols; 3434 svals = vals; 3435 for (i=0; i<m; i++) { 3436 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3437 smycols += ourlens[i]; 3438 svals += ourlens[i]; 3439 jj++; 3440 } 3441 3442 /* read in other processors and ship out */ 3443 for (i=1; i<size; i++) { 3444 nz = procsnz[i]; 3445 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3446 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3447 } 3448 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3449 } else { 3450 /* receive numeric values */ 3451 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3452 3453 /* receive message of values*/ 3454 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3455 3456 /* insert into matrix */ 3457 jj = rstart; 3458 smycols = mycols; 3459 svals = vals; 3460 for (i=0; i<m; i++) { 3461 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3462 smycols += ourlens[i]; 3463 svals += ourlens[i]; 3464 jj++; 3465 } 3466 } 3467 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3468 ierr = PetscFree(vals);CHKERRQ(ierr); 3469 ierr = PetscFree(mycols);CHKERRQ(ierr); 3470 ierr = PetscFree(rowners);CHKERRQ(ierr); 3471 3472 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3473 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3474 PetscFunctionReturn(0); 3475 } 3476 3477 #undef __FUNCT__ 3478 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3479 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3480 { 3481 PetscErrorCode ierr; 3482 IS iscol_local; 3483 PetscInt csize; 3484 3485 PetscFunctionBegin; 3486 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3487 if (call == MAT_REUSE_MATRIX) { 3488 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3489 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3490 } else { 3491 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3492 } 3493 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3494 if (call == MAT_INITIAL_MATRIX) { 3495 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3496 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3497 } 3498 PetscFunctionReturn(0); 3499 } 3500 3501 #undef __FUNCT__ 3502 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3503 /* 3504 Not great since it makes two copies of the submatrix, first an SeqAIJ 3505 in local and then by concatenating the local matrices the end result. 3506 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3507 3508 Note: This requires a sequential iscol with all indices. 3509 */ 3510 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3511 { 3512 PetscErrorCode ierr; 3513 PetscMPIInt rank,size; 3514 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3515 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3516 Mat *local,M,Mreuse; 3517 MatScalar *vwork,*aa; 3518 MPI_Comm comm = ((PetscObject)mat)->comm; 3519 Mat_SeqAIJ *aij; 3520 3521 3522 PetscFunctionBegin; 3523 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3524 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3525 3526 if (call == MAT_REUSE_MATRIX) { 3527 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3528 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3529 local = &Mreuse; 3530 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3531 } else { 3532 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3533 Mreuse = *local; 3534 ierr = PetscFree(local);CHKERRQ(ierr); 3535 } 3536 3537 /* 3538 m - number of local rows 3539 n - number of columns (same on all processors) 3540 rstart - first row in new global matrix generated 3541 */ 3542 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3543 if (call == MAT_INITIAL_MATRIX) { 3544 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3545 ii = aij->i; 3546 jj = aij->j; 3547 3548 /* 3549 Determine the number of non-zeros in the diagonal and off-diagonal 3550 portions of the matrix in order to do correct preallocation 3551 */ 3552 3553 /* first get start and end of "diagonal" columns */ 3554 if (csize == PETSC_DECIDE) { 3555 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3556 if (mglobal == n) { /* square matrix */ 3557 nlocal = m; 3558 } else { 3559 nlocal = n/size + ((n % size) > rank); 3560 } 3561 } else { 3562 nlocal = csize; 3563 } 3564 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3565 rstart = rend - nlocal; 3566 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3567 3568 /* next, compute all the lengths */ 3569 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3570 olens = dlens + m; 3571 for (i=0; i<m; i++) { 3572 jend = ii[i+1] - ii[i]; 3573 olen = 0; 3574 dlen = 0; 3575 for (j=0; j<jend; j++) { 3576 if (*jj < rstart || *jj >= rend) olen++; 3577 else dlen++; 3578 jj++; 3579 } 3580 olens[i] = olen; 3581 dlens[i] = dlen; 3582 } 3583 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3584 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3585 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3586 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3587 ierr = PetscFree(dlens);CHKERRQ(ierr); 3588 } else { 3589 PetscInt ml,nl; 3590 3591 M = *newmat; 3592 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3593 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3594 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3595 /* 3596 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3597 rather than the slower MatSetValues(). 3598 */ 3599 M->was_assembled = PETSC_TRUE; 3600 M->assembled = PETSC_FALSE; 3601 } 3602 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3603 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3604 ii = aij->i; 3605 jj = aij->j; 3606 aa = aij->a; 3607 for (i=0; i<m; i++) { 3608 row = rstart + i; 3609 nz = ii[i+1] - ii[i]; 3610 cwork = jj; jj += nz; 3611 vwork = aa; aa += nz; 3612 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3613 } 3614 3615 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3616 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3617 *newmat = M; 3618 3619 /* save submatrix used in processor for next request */ 3620 if (call == MAT_INITIAL_MATRIX) { 3621 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3622 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3623 } 3624 3625 PetscFunctionReturn(0); 3626 } 3627 3628 EXTERN_C_BEGIN 3629 #undef __FUNCT__ 3630 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3631 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3632 { 3633 PetscInt m,cstart, cend,j,nnz,i,d; 3634 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3635 const PetscInt *JJ; 3636 PetscScalar *values; 3637 PetscErrorCode ierr; 3638 3639 PetscFunctionBegin; 3640 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3641 3642 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3643 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3644 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3645 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3646 m = B->rmap->n; 3647 cstart = B->cmap->rstart; 3648 cend = B->cmap->rend; 3649 rstart = B->rmap->rstart; 3650 3651 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3652 3653 #if defined(PETSC_USE_DEBUGGING) 3654 for (i=0; i<m; i++) { 3655 nnz = Ii[i+1]- Ii[i]; 3656 JJ = J + Ii[i]; 3657 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3658 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3659 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3660 } 3661 #endif 3662 3663 for (i=0; i<m; i++) { 3664 nnz = Ii[i+1]- Ii[i]; 3665 JJ = J + Ii[i]; 3666 nnz_max = PetscMax(nnz_max,nnz); 3667 d = 0; 3668 for (j=0; j<nnz; j++) { 3669 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3670 } 3671 d_nnz[i] = d; 3672 o_nnz[i] = nnz - d; 3673 } 3674 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3675 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3676 3677 if (v) values = (PetscScalar*)v; 3678 else { 3679 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3680 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3681 } 3682 3683 for (i=0; i<m; i++) { 3684 ii = i + rstart; 3685 nnz = Ii[i+1]- Ii[i]; 3686 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3687 } 3688 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3689 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3690 3691 if (!v) { 3692 ierr = PetscFree(values);CHKERRQ(ierr); 3693 } 3694 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3695 PetscFunctionReturn(0); 3696 } 3697 EXTERN_C_END 3698 3699 #undef __FUNCT__ 3700 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3701 /*@ 3702 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3703 (the default parallel PETSc format). 3704 3705 Collective on MPI_Comm 3706 3707 Input Parameters: 3708 + B - the matrix 3709 . i - the indices into j for the start of each local row (starts with zero) 3710 . j - the column indices for each local row (starts with zero) 3711 - v - optional values in the matrix 3712 3713 Level: developer 3714 3715 Notes: 3716 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3717 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3718 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3719 3720 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3721 3722 The format which is used for the sparse matrix input, is equivalent to a 3723 row-major ordering.. i.e for the following matrix, the input data expected is 3724 as shown: 3725 3726 1 0 0 3727 2 0 3 P0 3728 ------- 3729 4 5 6 P1 3730 3731 Process0 [P0]: rows_owned=[0,1] 3732 i = {0,1,3} [size = nrow+1 = 2+1] 3733 j = {0,0,2} [size = nz = 6] 3734 v = {1,2,3} [size = nz = 6] 3735 3736 Process1 [P1]: rows_owned=[2] 3737 i = {0,3} [size = nrow+1 = 1+1] 3738 j = {0,1,2} [size = nz = 6] 3739 v = {4,5,6} [size = nz = 6] 3740 3741 .keywords: matrix, aij, compressed row, sparse, parallel 3742 3743 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3744 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3745 @*/ 3746 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3747 { 3748 PetscErrorCode ierr; 3749 3750 PetscFunctionBegin; 3751 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3752 PetscFunctionReturn(0); 3753 } 3754 3755 #undef __FUNCT__ 3756 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3757 /*@C 3758 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3759 (the default parallel PETSc format). For good matrix assembly performance 3760 the user should preallocate the matrix storage by setting the parameters 3761 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3762 performance can be increased by more than a factor of 50. 3763 3764 Collective on MPI_Comm 3765 3766 Input Parameters: 3767 + A - the matrix 3768 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3769 (same value is used for all local rows) 3770 . d_nnz - array containing the number of nonzeros in the various rows of the 3771 DIAGONAL portion of the local submatrix (possibly different for each row) 3772 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3773 The size of this array is equal to the number of local rows, i.e 'm'. 3774 For matrices that will be factored, you must leave room for (and set) 3775 the diagonal entry even if it is zero. 3776 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3777 submatrix (same value is used for all local rows). 3778 - o_nnz - array containing the number of nonzeros in the various rows of the 3779 OFF-DIAGONAL portion of the local submatrix (possibly different for 3780 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3781 structure. The size of this array is equal to the number 3782 of local rows, i.e 'm'. 3783 3784 If the *_nnz parameter is given then the *_nz parameter is ignored 3785 3786 The AIJ format (also called the Yale sparse matrix format or 3787 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3788 storage. The stored row and column indices begin with zero. 3789 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3790 3791 The parallel matrix is partitioned such that the first m0 rows belong to 3792 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3793 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3794 3795 The DIAGONAL portion of the local submatrix of a processor can be defined 3796 as the submatrix which is obtained by extraction the part corresponding to 3797 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3798 first row that belongs to the processor, r2 is the last row belonging to 3799 the this processor, and c1-c2 is range of indices of the local part of a 3800 vector suitable for applying the matrix to. This is an mxn matrix. In the 3801 common case of a square matrix, the row and column ranges are the same and 3802 the DIAGONAL part is also square. The remaining portion of the local 3803 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3804 3805 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3806 3807 You can call MatGetInfo() to get information on how effective the preallocation was; 3808 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3809 You can also run with the option -info and look for messages with the string 3810 malloc in them to see if additional memory allocation was needed. 3811 3812 Example usage: 3813 3814 Consider the following 8x8 matrix with 34 non-zero values, that is 3815 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3816 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3817 as follows: 3818 3819 .vb 3820 1 2 0 | 0 3 0 | 0 4 3821 Proc0 0 5 6 | 7 0 0 | 8 0 3822 9 0 10 | 11 0 0 | 12 0 3823 ------------------------------------- 3824 13 0 14 | 15 16 17 | 0 0 3825 Proc1 0 18 0 | 19 20 21 | 0 0 3826 0 0 0 | 22 23 0 | 24 0 3827 ------------------------------------- 3828 Proc2 25 26 27 | 0 0 28 | 29 0 3829 30 0 0 | 31 32 33 | 0 34 3830 .ve 3831 3832 This can be represented as a collection of submatrices as: 3833 3834 .vb 3835 A B C 3836 D E F 3837 G H I 3838 .ve 3839 3840 Where the submatrices A,B,C are owned by proc0, D,E,F are 3841 owned by proc1, G,H,I are owned by proc2. 3842 3843 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3844 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3845 The 'M','N' parameters are 8,8, and have the same values on all procs. 3846 3847 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3848 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3849 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3850 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3851 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3852 matrix, ans [DF] as another SeqAIJ matrix. 3853 3854 When d_nz, o_nz parameters are specified, d_nz storage elements are 3855 allocated for every row of the local diagonal submatrix, and o_nz 3856 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3857 One way to choose d_nz and o_nz is to use the max nonzerors per local 3858 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3859 In this case, the values of d_nz,o_nz are: 3860 .vb 3861 proc0 : dnz = 2, o_nz = 2 3862 proc1 : dnz = 3, o_nz = 2 3863 proc2 : dnz = 1, o_nz = 4 3864 .ve 3865 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3866 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3867 for proc3. i.e we are using 12+15+10=37 storage locations to store 3868 34 values. 3869 3870 When d_nnz, o_nnz parameters are specified, the storage is specified 3871 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3872 In the above case the values for d_nnz,o_nnz are: 3873 .vb 3874 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3875 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3876 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3877 .ve 3878 Here the space allocated is sum of all the above values i.e 34, and 3879 hence pre-allocation is perfect. 3880 3881 Level: intermediate 3882 3883 .keywords: matrix, aij, compressed row, sparse, parallel 3884 3885 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3886 MPIAIJ, MatGetInfo() 3887 @*/ 3888 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3889 { 3890 PetscErrorCode ierr; 3891 3892 PetscFunctionBegin; 3893 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3894 PetscValidType(B,1); 3895 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3896 PetscFunctionReturn(0); 3897 } 3898 3899 #undef __FUNCT__ 3900 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3901 /*@ 3902 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3903 CSR format the local rows. 3904 3905 Collective on MPI_Comm 3906 3907 Input Parameters: 3908 + comm - MPI communicator 3909 . m - number of local rows (Cannot be PETSC_DECIDE) 3910 . n - This value should be the same as the local size used in creating the 3911 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3912 calculated if N is given) For square matrices n is almost always m. 3913 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3914 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3915 . i - row indices 3916 . j - column indices 3917 - a - matrix values 3918 3919 Output Parameter: 3920 . mat - the matrix 3921 3922 Level: intermediate 3923 3924 Notes: 3925 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3926 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3927 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3928 3929 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3930 3931 The format which is used for the sparse matrix input, is equivalent to a 3932 row-major ordering.. i.e for the following matrix, the input data expected is 3933 as shown: 3934 3935 1 0 0 3936 2 0 3 P0 3937 ------- 3938 4 5 6 P1 3939 3940 Process0 [P0]: rows_owned=[0,1] 3941 i = {0,1,3} [size = nrow+1 = 2+1] 3942 j = {0,0,2} [size = nz = 6] 3943 v = {1,2,3} [size = nz = 6] 3944 3945 Process1 [P1]: rows_owned=[2] 3946 i = {0,3} [size = nrow+1 = 1+1] 3947 j = {0,1,2} [size = nz = 6] 3948 v = {4,5,6} [size = nz = 6] 3949 3950 .keywords: matrix, aij, compressed row, sparse, parallel 3951 3952 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3953 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3954 @*/ 3955 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3956 { 3957 PetscErrorCode ierr; 3958 3959 PetscFunctionBegin; 3960 if (i[0]) { 3961 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3962 } 3963 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3964 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3965 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3966 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3967 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3968 PetscFunctionReturn(0); 3969 } 3970 3971 #undef __FUNCT__ 3972 #define __FUNCT__ "MatCreateMPIAIJ" 3973 /*@C 3974 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3975 (the default parallel PETSc format). For good matrix assembly performance 3976 the user should preallocate the matrix storage by setting the parameters 3977 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3978 performance can be increased by more than a factor of 50. 3979 3980 Collective on MPI_Comm 3981 3982 Input Parameters: 3983 + comm - MPI communicator 3984 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3985 This value should be the same as the local size used in creating the 3986 y vector for the matrix-vector product y = Ax. 3987 . n - This value should be the same as the local size used in creating the 3988 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3989 calculated if N is given) For square matrices n is almost always m. 3990 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3991 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3992 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3993 (same value is used for all local rows) 3994 . d_nnz - array containing the number of nonzeros in the various rows of the 3995 DIAGONAL portion of the local submatrix (possibly different for each row) 3996 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3997 The size of this array is equal to the number of local rows, i.e 'm'. 3998 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3999 submatrix (same value is used for all local rows). 4000 - o_nnz - array containing the number of nonzeros in the various rows of the 4001 OFF-DIAGONAL portion of the local submatrix (possibly different for 4002 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4003 structure. The size of this array is equal to the number 4004 of local rows, i.e 'm'. 4005 4006 Output Parameter: 4007 . A - the matrix 4008 4009 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4010 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4011 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4012 4013 Notes: 4014 If the *_nnz parameter is given then the *_nz parameter is ignored 4015 4016 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4017 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4018 storage requirements for this matrix. 4019 4020 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4021 processor than it must be used on all processors that share the object for 4022 that argument. 4023 4024 The user MUST specify either the local or global matrix dimensions 4025 (possibly both). 4026 4027 The parallel matrix is partitioned across processors such that the 4028 first m0 rows belong to process 0, the next m1 rows belong to 4029 process 1, the next m2 rows belong to process 2 etc.. where 4030 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4031 values corresponding to [m x N] submatrix. 4032 4033 The columns are logically partitioned with the n0 columns belonging 4034 to 0th partition, the next n1 columns belonging to the next 4035 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4036 4037 The DIAGONAL portion of the local submatrix on any given processor 4038 is the submatrix corresponding to the rows and columns m,n 4039 corresponding to the given processor. i.e diagonal matrix on 4040 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4041 etc. The remaining portion of the local submatrix [m x (N-n)] 4042 constitute the OFF-DIAGONAL portion. The example below better 4043 illustrates this concept. 4044 4045 For a square global matrix we define each processor's diagonal portion 4046 to be its local rows and the corresponding columns (a square submatrix); 4047 each processor's off-diagonal portion encompasses the remainder of the 4048 local matrix (a rectangular submatrix). 4049 4050 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4051 4052 When calling this routine with a single process communicator, a matrix of 4053 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4054 type of communicator, use the construction mechanism: 4055 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4056 4057 By default, this format uses inodes (identical nodes) when possible. 4058 We search for consecutive rows with the same nonzero structure, thereby 4059 reusing matrix information to achieve increased efficiency. 4060 4061 Options Database Keys: 4062 + -mat_no_inode - Do not use inodes 4063 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4064 - -mat_aij_oneindex - Internally use indexing starting at 1 4065 rather than 0. Note that when calling MatSetValues(), 4066 the user still MUST index entries starting at 0! 4067 4068 4069 Example usage: 4070 4071 Consider the following 8x8 matrix with 34 non-zero values, that is 4072 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4073 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4074 as follows: 4075 4076 .vb 4077 1 2 0 | 0 3 0 | 0 4 4078 Proc0 0 5 6 | 7 0 0 | 8 0 4079 9 0 10 | 11 0 0 | 12 0 4080 ------------------------------------- 4081 13 0 14 | 15 16 17 | 0 0 4082 Proc1 0 18 0 | 19 20 21 | 0 0 4083 0 0 0 | 22 23 0 | 24 0 4084 ------------------------------------- 4085 Proc2 25 26 27 | 0 0 28 | 29 0 4086 30 0 0 | 31 32 33 | 0 34 4087 .ve 4088 4089 This can be represented as a collection of submatrices as: 4090 4091 .vb 4092 A B C 4093 D E F 4094 G H I 4095 .ve 4096 4097 Where the submatrices A,B,C are owned by proc0, D,E,F are 4098 owned by proc1, G,H,I are owned by proc2. 4099 4100 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4101 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4102 The 'M','N' parameters are 8,8, and have the same values on all procs. 4103 4104 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4105 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4106 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4107 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4108 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4109 matrix, ans [DF] as another SeqAIJ matrix. 4110 4111 When d_nz, o_nz parameters are specified, d_nz storage elements are 4112 allocated for every row of the local diagonal submatrix, and o_nz 4113 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4114 One way to choose d_nz and o_nz is to use the max nonzerors per local 4115 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4116 In this case, the values of d_nz,o_nz are: 4117 .vb 4118 proc0 : dnz = 2, o_nz = 2 4119 proc1 : dnz = 3, o_nz = 2 4120 proc2 : dnz = 1, o_nz = 4 4121 .ve 4122 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4123 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4124 for proc3. i.e we are using 12+15+10=37 storage locations to store 4125 34 values. 4126 4127 When d_nnz, o_nnz parameters are specified, the storage is specified 4128 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4129 In the above case the values for d_nnz,o_nnz are: 4130 .vb 4131 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4132 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4133 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4134 .ve 4135 Here the space allocated is sum of all the above values i.e 34, and 4136 hence pre-allocation is perfect. 4137 4138 Level: intermediate 4139 4140 .keywords: matrix, aij, compressed row, sparse, parallel 4141 4142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4143 MPIAIJ, MatCreateMPIAIJWithArrays() 4144 @*/ 4145 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4146 { 4147 PetscErrorCode ierr; 4148 PetscMPIInt size; 4149 4150 PetscFunctionBegin; 4151 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4152 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4153 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4154 if (size > 1) { 4155 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4156 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4157 } else { 4158 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4159 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4160 } 4161 PetscFunctionReturn(0); 4162 } 4163 4164 #undef __FUNCT__ 4165 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4166 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4167 { 4168 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4169 4170 PetscFunctionBegin; 4171 *Ad = a->A; 4172 *Ao = a->B; 4173 *colmap = a->garray; 4174 PetscFunctionReturn(0); 4175 } 4176 4177 #undef __FUNCT__ 4178 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4179 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4180 { 4181 PetscErrorCode ierr; 4182 PetscInt i; 4183 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4184 4185 PetscFunctionBegin; 4186 if (coloring->ctype == IS_COLORING_GLOBAL) { 4187 ISColoringValue *allcolors,*colors; 4188 ISColoring ocoloring; 4189 4190 /* set coloring for diagonal portion */ 4191 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4192 4193 /* set coloring for off-diagonal portion */ 4194 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4195 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4196 for (i=0; i<a->B->cmap->n; i++) { 4197 colors[i] = allcolors[a->garray[i]]; 4198 } 4199 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4200 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4201 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4202 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4203 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4204 ISColoringValue *colors; 4205 PetscInt *larray; 4206 ISColoring ocoloring; 4207 4208 /* set coloring for diagonal portion */ 4209 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4210 for (i=0; i<a->A->cmap->n; i++) { 4211 larray[i] = i + A->cmap->rstart; 4212 } 4213 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4214 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4215 for (i=0; i<a->A->cmap->n; i++) { 4216 colors[i] = coloring->colors[larray[i]]; 4217 } 4218 ierr = PetscFree(larray);CHKERRQ(ierr); 4219 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4220 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4221 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4222 4223 /* set coloring for off-diagonal portion */ 4224 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4225 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4226 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4227 for (i=0; i<a->B->cmap->n; i++) { 4228 colors[i] = coloring->colors[larray[i]]; 4229 } 4230 ierr = PetscFree(larray);CHKERRQ(ierr); 4231 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4232 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4233 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4234 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4235 4236 PetscFunctionReturn(0); 4237 } 4238 4239 #if defined(PETSC_HAVE_ADIC) 4240 #undef __FUNCT__ 4241 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4242 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4243 { 4244 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4245 PetscErrorCode ierr; 4246 4247 PetscFunctionBegin; 4248 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4249 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4250 PetscFunctionReturn(0); 4251 } 4252 #endif 4253 4254 #undef __FUNCT__ 4255 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4256 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4257 { 4258 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4259 PetscErrorCode ierr; 4260 4261 PetscFunctionBegin; 4262 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4263 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4264 PetscFunctionReturn(0); 4265 } 4266 4267 #undef __FUNCT__ 4268 #define __FUNCT__ "MatMergeSymbolic" 4269 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4270 { 4271 PetscErrorCode ierr; 4272 PetscInt m,N,i,rstart,nnz,*dnz,*onz; 4273 PetscInt *indx; 4274 4275 PetscFunctionBegin; 4276 /* This routine will ONLY return MPIAIJ type matrix */ 4277 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4278 if (n == PETSC_DECIDE){ 4279 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4280 } 4281 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4282 rstart -= m; 4283 4284 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4285 for (i=0;i<m;i++) { 4286 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4287 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4288 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4289 } 4290 4291 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4292 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4293 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4294 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4295 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4296 PetscFunctionReturn(0); 4297 } 4298 4299 #undef __FUNCT__ 4300 #define __FUNCT__ "MatMergeNumeric" 4301 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4302 { 4303 PetscErrorCode ierr; 4304 PetscInt m,N,i,rstart,nnz,Ii; 4305 PetscInt *indx; 4306 PetscScalar *values; 4307 4308 PetscFunctionBegin; 4309 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4310 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4311 for (i=0;i<m;i++) { 4312 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4313 Ii = i + rstart; 4314 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4315 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4316 } 4317 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4318 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4319 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4320 PetscFunctionReturn(0); 4321 } 4322 4323 #undef __FUNCT__ 4324 #define __FUNCT__ "MatMerge" 4325 /*@ 4326 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4327 matrices from each processor 4328 4329 Collective on MPI_Comm 4330 4331 Input Parameters: 4332 + comm - the communicators the parallel matrix will live on 4333 . inmat - the input sequential matrices 4334 . n - number of local columns (or PETSC_DECIDE) 4335 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4336 4337 Output Parameter: 4338 . outmat - the parallel matrix generated 4339 4340 Level: advanced 4341 4342 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4343 4344 @*/ 4345 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4346 { 4347 PetscErrorCode ierr; 4348 4349 PetscFunctionBegin; 4350 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4351 if (scall == MAT_INITIAL_MATRIX){ 4352 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4353 } 4354 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4355 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4356 PetscFunctionReturn(0); 4357 } 4358 4359 #undef __FUNCT__ 4360 #define __FUNCT__ "MatFileSplit" 4361 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4362 { 4363 PetscErrorCode ierr; 4364 PetscMPIInt rank; 4365 PetscInt m,N,i,rstart,nnz; 4366 size_t len; 4367 const PetscInt *indx; 4368 PetscViewer out; 4369 char *name; 4370 Mat B; 4371 const PetscScalar *values; 4372 4373 PetscFunctionBegin; 4374 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4375 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4376 /* Should this be the type of the diagonal block of A? */ 4377 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4378 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4379 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4380 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4381 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4382 for (i=0;i<m;i++) { 4383 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4384 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4385 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4386 } 4387 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4388 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4389 4390 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4391 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4392 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4393 sprintf(name,"%s.%d",outfile,rank); 4394 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4395 ierr = PetscFree(name); 4396 ierr = MatView(B,out);CHKERRQ(ierr); 4397 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4398 ierr = MatDestroy(&B);CHKERRQ(ierr); 4399 PetscFunctionReturn(0); 4400 } 4401 4402 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4403 #undef __FUNCT__ 4404 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4405 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4406 { 4407 PetscErrorCode ierr; 4408 Mat_Merge_SeqsToMPI *merge; 4409 PetscContainer container; 4410 4411 PetscFunctionBegin; 4412 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4413 if (container) { 4414 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4415 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4416 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4417 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4418 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4419 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4420 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4426 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4427 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4428 ierr = PetscFree(merge);CHKERRQ(ierr); 4429 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4430 } 4431 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4432 PetscFunctionReturn(0); 4433 } 4434 4435 #include <../src/mat/utils/freespace.h> 4436 #include <petscbt.h> 4437 4438 #undef __FUNCT__ 4439 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4440 /*@C 4441 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4442 matrices from each processor 4443 4444 Collective on MPI_Comm 4445 4446 Input Parameters: 4447 + comm - the communicators the parallel matrix will live on 4448 . seqmat - the input sequential matrices 4449 . m - number of local rows (or PETSC_DECIDE) 4450 . n - number of local columns (or PETSC_DECIDE) 4451 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4452 4453 Output Parameter: 4454 . mpimat - the parallel matrix generated 4455 4456 Level: advanced 4457 4458 Notes: 4459 The dimensions of the sequential matrix in each processor MUST be the same. 4460 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4461 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4462 @*/ 4463 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4464 { 4465 PetscErrorCode ierr; 4466 MPI_Comm comm=((PetscObject)mpimat)->comm; 4467 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4468 PetscMPIInt size,rank,taga,*len_s; 4469 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4470 PetscInt proc,m; 4471 PetscInt **buf_ri,**buf_rj; 4472 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4473 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4474 MPI_Request *s_waits,*r_waits; 4475 MPI_Status *status; 4476 MatScalar *aa=a->a; 4477 MatScalar **abuf_r,*ba_i; 4478 Mat_Merge_SeqsToMPI *merge; 4479 PetscContainer container; 4480 4481 PetscFunctionBegin; 4482 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4483 4484 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4485 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4486 4487 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4488 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4489 4490 bi = merge->bi; 4491 bj = merge->bj; 4492 buf_ri = merge->buf_ri; 4493 buf_rj = merge->buf_rj; 4494 4495 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4496 owners = merge->rowmap->range; 4497 len_s = merge->len_s; 4498 4499 /* send and recv matrix values */ 4500 /*-----------------------------*/ 4501 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4502 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4503 4504 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4505 for (proc=0,k=0; proc<size; proc++){ 4506 if (!len_s[proc]) continue; 4507 i = owners[proc]; 4508 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4509 k++; 4510 } 4511 4512 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4513 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4514 ierr = PetscFree(status);CHKERRQ(ierr); 4515 4516 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4517 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4518 4519 /* insert mat values of mpimat */ 4520 /*----------------------------*/ 4521 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4522 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4523 4524 for (k=0; k<merge->nrecv; k++){ 4525 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4526 nrows = *(buf_ri_k[k]); 4527 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4528 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4529 } 4530 4531 /* set values of ba */ 4532 m = merge->rowmap->n; 4533 for (i=0; i<m; i++) { 4534 arow = owners[rank] + i; 4535 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4536 bnzi = bi[i+1] - bi[i]; 4537 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4538 4539 /* add local non-zero vals of this proc's seqmat into ba */ 4540 anzi = ai[arow+1] - ai[arow]; 4541 aj = a->j + ai[arow]; 4542 aa = a->a + ai[arow]; 4543 nextaj = 0; 4544 for (j=0; nextaj<anzi; j++){ 4545 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4546 ba_i[j] += aa[nextaj++]; 4547 } 4548 } 4549 4550 /* add received vals into ba */ 4551 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4552 /* i-th row */ 4553 if (i == *nextrow[k]) { 4554 anzi = *(nextai[k]+1) - *nextai[k]; 4555 aj = buf_rj[k] + *(nextai[k]); 4556 aa = abuf_r[k] + *(nextai[k]); 4557 nextaj = 0; 4558 for (j=0; nextaj<anzi; j++){ 4559 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4560 ba_i[j] += aa[nextaj++]; 4561 } 4562 } 4563 nextrow[k]++; nextai[k]++; 4564 } 4565 } 4566 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4567 } 4568 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4569 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4570 4571 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4572 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4573 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4574 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4575 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4576 PetscFunctionReturn(0); 4577 } 4578 4579 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4580 4581 #undef __FUNCT__ 4582 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4583 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4584 { 4585 PetscErrorCode ierr; 4586 Mat B_mpi; 4587 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4588 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4589 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4590 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4591 PetscInt len,proc,*dnz,*onz; 4592 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4593 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4594 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4595 MPI_Status *status; 4596 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4597 PetscBT lnkbt; 4598 Mat_Merge_SeqsToMPI *merge; 4599 PetscContainer container; 4600 4601 PetscFunctionBegin; 4602 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4603 4604 /* make sure it is a PETSc comm */ 4605 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4606 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4607 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4608 4609 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4610 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4611 4612 /* determine row ownership */ 4613 /*---------------------------------------------------------*/ 4614 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4615 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4616 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4617 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4618 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4619 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4620 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4621 4622 m = merge->rowmap->n; 4623 M = merge->rowmap->N; 4624 owners = merge->rowmap->range; 4625 4626 /* determine the number of messages to send, their lengths */ 4627 /*---------------------------------------------------------*/ 4628 len_s = merge->len_s; 4629 4630 len = 0; /* length of buf_si[] */ 4631 merge->nsend = 0; 4632 for (proc=0; proc<size; proc++){ 4633 len_si[proc] = 0; 4634 if (proc == rank){ 4635 len_s[proc] = 0; 4636 } else { 4637 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4638 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4639 } 4640 if (len_s[proc]) { 4641 merge->nsend++; 4642 nrows = 0; 4643 for (i=owners[proc]; i<owners[proc+1]; i++){ 4644 if (ai[i+1] > ai[i]) nrows++; 4645 } 4646 len_si[proc] = 2*(nrows+1); 4647 len += len_si[proc]; 4648 } 4649 } 4650 4651 /* determine the number and length of messages to receive for ij-structure */ 4652 /*-------------------------------------------------------------------------*/ 4653 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4654 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4655 4656 /* post the Irecv of j-structure */ 4657 /*-------------------------------*/ 4658 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4659 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4660 4661 /* post the Isend of j-structure */ 4662 /*--------------------------------*/ 4663 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4664 4665 for (proc=0, k=0; proc<size; proc++){ 4666 if (!len_s[proc]) continue; 4667 i = owners[proc]; 4668 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4669 k++; 4670 } 4671 4672 /* receives and sends of j-structure are complete */ 4673 /*------------------------------------------------*/ 4674 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4675 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4676 4677 /* send and recv i-structure */ 4678 /*---------------------------*/ 4679 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4680 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4681 4682 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4683 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4684 for (proc=0,k=0; proc<size; proc++){ 4685 if (!len_s[proc]) continue; 4686 /* form outgoing message for i-structure: 4687 buf_si[0]: nrows to be sent 4688 [1:nrows]: row index (global) 4689 [nrows+1:2*nrows+1]: i-structure index 4690 */ 4691 /*-------------------------------------------*/ 4692 nrows = len_si[proc]/2 - 1; 4693 buf_si_i = buf_si + nrows+1; 4694 buf_si[0] = nrows; 4695 buf_si_i[0] = 0; 4696 nrows = 0; 4697 for (i=owners[proc]; i<owners[proc+1]; i++){ 4698 anzi = ai[i+1] - ai[i]; 4699 if (anzi) { 4700 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4701 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4702 nrows++; 4703 } 4704 } 4705 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4706 k++; 4707 buf_si += len_si[proc]; 4708 } 4709 4710 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4711 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4712 4713 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4714 for (i=0; i<merge->nrecv; i++){ 4715 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4716 } 4717 4718 ierr = PetscFree(len_si);CHKERRQ(ierr); 4719 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4720 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4721 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4722 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4723 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4724 ierr = PetscFree(status);CHKERRQ(ierr); 4725 4726 /* compute a local seq matrix in each processor */ 4727 /*----------------------------------------------*/ 4728 /* allocate bi array and free space for accumulating nonzero column info */ 4729 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4730 bi[0] = 0; 4731 4732 /* create and initialize a linked list */ 4733 nlnk = N+1; 4734 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4735 4736 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4737 len = 0; 4738 len = ai[owners[rank+1]] - ai[owners[rank]]; 4739 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4740 current_space = free_space; 4741 4742 /* determine symbolic info for each local row */ 4743 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4744 4745 for (k=0; k<merge->nrecv; k++){ 4746 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4747 nrows = *buf_ri_k[k]; 4748 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4749 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4750 } 4751 4752 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4753 len = 0; 4754 for (i=0;i<m;i++) { 4755 bnzi = 0; 4756 /* add local non-zero cols of this proc's seqmat into lnk */ 4757 arow = owners[rank] + i; 4758 anzi = ai[arow+1] - ai[arow]; 4759 aj = a->j + ai[arow]; 4760 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4761 bnzi += nlnk; 4762 /* add received col data into lnk */ 4763 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4764 if (i == *nextrow[k]) { /* i-th row */ 4765 anzi = *(nextai[k]+1) - *nextai[k]; 4766 aj = buf_rj[k] + *nextai[k]; 4767 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4768 bnzi += nlnk; 4769 nextrow[k]++; nextai[k]++; 4770 } 4771 } 4772 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4773 4774 /* if free space is not available, make more free space */ 4775 if (current_space->local_remaining<bnzi) { 4776 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4777 nspacedouble++; 4778 } 4779 /* copy data into free space, then initialize lnk */ 4780 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4781 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4782 4783 current_space->array += bnzi; 4784 current_space->local_used += bnzi; 4785 current_space->local_remaining -= bnzi; 4786 4787 bi[i+1] = bi[i] + bnzi; 4788 } 4789 4790 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4791 4792 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4793 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4794 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4795 4796 /* create symbolic parallel matrix B_mpi */ 4797 /*---------------------------------------*/ 4798 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4799 if (n==PETSC_DECIDE) { 4800 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4801 } else { 4802 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4803 } 4804 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4805 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4806 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4807 4808 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4809 B_mpi->assembled = PETSC_FALSE; 4810 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4811 merge->bi = bi; 4812 merge->bj = bj; 4813 merge->buf_ri = buf_ri; 4814 merge->buf_rj = buf_rj; 4815 merge->coi = PETSC_NULL; 4816 merge->coj = PETSC_NULL; 4817 merge->owners_co = PETSC_NULL; 4818 4819 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4820 4821 /* attach the supporting struct to B_mpi for reuse */ 4822 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4823 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4824 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4825 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4826 *mpimat = B_mpi; 4827 4828 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4829 PetscFunctionReturn(0); 4830 } 4831 4832 #undef __FUNCT__ 4833 #define __FUNCT__ "MatMerge_SeqsToMPI" 4834 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4835 { 4836 PetscErrorCode ierr; 4837 4838 PetscFunctionBegin; 4839 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4840 if (scall == MAT_INITIAL_MATRIX){ 4841 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4842 } 4843 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4844 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4845 PetscFunctionReturn(0); 4846 } 4847 4848 #undef __FUNCT__ 4849 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4850 /*@ 4851 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4852 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4853 with MatGetSize() 4854 4855 Not Collective 4856 4857 Input Parameters: 4858 + A - the matrix 4859 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4860 4861 Output Parameter: 4862 . A_loc - the local sequential matrix generated 4863 4864 Level: developer 4865 4866 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4867 4868 @*/ 4869 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4870 { 4871 PetscErrorCode ierr; 4872 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4873 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4874 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4875 MatScalar *aa=a->a,*ba=b->a,*cam; 4876 PetscScalar *ca; 4877 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4878 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4879 PetscBool match; 4880 4881 PetscFunctionBegin; 4882 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4883 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4884 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4885 if (scall == MAT_INITIAL_MATRIX){ 4886 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4887 ci[0] = 0; 4888 for (i=0; i<am; i++){ 4889 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4890 } 4891 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4892 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4893 k = 0; 4894 for (i=0; i<am; i++) { 4895 ncols_o = bi[i+1] - bi[i]; 4896 ncols_d = ai[i+1] - ai[i]; 4897 /* off-diagonal portion of A */ 4898 for (jo=0; jo<ncols_o; jo++) { 4899 col = cmap[*bj]; 4900 if (col >= cstart) break; 4901 cj[k] = col; bj++; 4902 ca[k++] = *ba++; 4903 } 4904 /* diagonal portion of A */ 4905 for (j=0; j<ncols_d; j++) { 4906 cj[k] = cstart + *aj++; 4907 ca[k++] = *aa++; 4908 } 4909 /* off-diagonal portion of A */ 4910 for (j=jo; j<ncols_o; j++) { 4911 cj[k] = cmap[*bj++]; 4912 ca[k++] = *ba++; 4913 } 4914 } 4915 /* put together the new matrix */ 4916 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4917 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4918 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4919 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4920 mat->free_a = PETSC_TRUE; 4921 mat->free_ij = PETSC_TRUE; 4922 mat->nonew = 0; 4923 } else if (scall == MAT_REUSE_MATRIX){ 4924 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4925 ci = mat->i; cj = mat->j; cam = mat->a; 4926 for (i=0; i<am; i++) { 4927 /* off-diagonal portion of A */ 4928 ncols_o = bi[i+1] - bi[i]; 4929 for (jo=0; jo<ncols_o; jo++) { 4930 col = cmap[*bj]; 4931 if (col >= cstart) break; 4932 *cam++ = *ba++; bj++; 4933 } 4934 /* diagonal portion of A */ 4935 ncols_d = ai[i+1] - ai[i]; 4936 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4937 /* off-diagonal portion of A */ 4938 for (j=jo; j<ncols_o; j++) { 4939 *cam++ = *ba++; bj++; 4940 } 4941 } 4942 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4943 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4944 PetscFunctionReturn(0); 4945 } 4946 4947 #undef __FUNCT__ 4948 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4949 /*@C 4950 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4951 4952 Not Collective 4953 4954 Input Parameters: 4955 + A - the matrix 4956 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4957 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4958 4959 Output Parameter: 4960 . A_loc - the local sequential matrix generated 4961 4962 Level: developer 4963 4964 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4965 4966 @*/ 4967 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4968 { 4969 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4970 PetscErrorCode ierr; 4971 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4972 IS isrowa,iscola; 4973 Mat *aloc; 4974 PetscBool match; 4975 4976 PetscFunctionBegin; 4977 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4978 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4979 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4980 if (!row){ 4981 start = A->rmap->rstart; end = A->rmap->rend; 4982 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4983 } else { 4984 isrowa = *row; 4985 } 4986 if (!col){ 4987 start = A->cmap->rstart; 4988 cmap = a->garray; 4989 nzA = a->A->cmap->n; 4990 nzB = a->B->cmap->n; 4991 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4992 ncols = 0; 4993 for (i=0; i<nzB; i++) { 4994 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4995 else break; 4996 } 4997 imark = i; 4998 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4999 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5000 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5001 } else { 5002 iscola = *col; 5003 } 5004 if (scall != MAT_INITIAL_MATRIX){ 5005 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5006 aloc[0] = *A_loc; 5007 } 5008 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5009 *A_loc = aloc[0]; 5010 ierr = PetscFree(aloc);CHKERRQ(ierr); 5011 if (!row){ 5012 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5013 } 5014 if (!col){ 5015 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5016 } 5017 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5018 PetscFunctionReturn(0); 5019 } 5020 5021 #undef __FUNCT__ 5022 #define __FUNCT__ "MatGetBrowsOfAcols" 5023 /*@C 5024 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5025 5026 Collective on Mat 5027 5028 Input Parameters: 5029 + A,B - the matrices in mpiaij format 5030 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5031 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5032 5033 Output Parameter: 5034 + rowb, colb - index sets of rows and columns of B to extract 5035 - B_seq - the sequential matrix generated 5036 5037 Level: developer 5038 5039 @*/ 5040 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5041 { 5042 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5043 PetscErrorCode ierr; 5044 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5045 IS isrowb,iscolb; 5046 Mat *bseq=PETSC_NULL; 5047 5048 PetscFunctionBegin; 5049 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5050 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5051 } 5052 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5053 5054 if (scall == MAT_INITIAL_MATRIX){ 5055 start = A->cmap->rstart; 5056 cmap = a->garray; 5057 nzA = a->A->cmap->n; 5058 nzB = a->B->cmap->n; 5059 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5060 ncols = 0; 5061 for (i=0; i<nzB; i++) { /* row < local row index */ 5062 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5063 else break; 5064 } 5065 imark = i; 5066 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5067 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5068 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5069 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5070 } else { 5071 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5072 isrowb = *rowb; iscolb = *colb; 5073 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5074 bseq[0] = *B_seq; 5075 } 5076 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5077 *B_seq = bseq[0]; 5078 ierr = PetscFree(bseq);CHKERRQ(ierr); 5079 if (!rowb){ 5080 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5081 } else { 5082 *rowb = isrowb; 5083 } 5084 if (!colb){ 5085 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5086 } else { 5087 *colb = iscolb; 5088 } 5089 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5090 PetscFunctionReturn(0); 5091 } 5092 5093 #undef __FUNCT__ 5094 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5095 /* 5096 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5097 of the OFF-DIAGONAL portion of local A 5098 5099 Collective on Mat 5100 5101 Input Parameters: 5102 + A,B - the matrices in mpiaij format 5103 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5104 5105 Output Parameter: 5106 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5107 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5108 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5109 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5110 5111 Level: developer 5112 5113 */ 5114 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5115 { 5116 VecScatter_MPI_General *gen_to,*gen_from; 5117 PetscErrorCode ierr; 5118 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5119 Mat_SeqAIJ *b_oth; 5120 VecScatter ctx=a->Mvctx; 5121 MPI_Comm comm=((PetscObject)ctx)->comm; 5122 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5123 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5124 PetscScalar *rvalues,*svalues; 5125 MatScalar *b_otha,*bufa,*bufA; 5126 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5127 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5128 MPI_Status *sstatus,rstatus; 5129 PetscMPIInt jj; 5130 PetscInt *cols,sbs,rbs; 5131 PetscScalar *vals; 5132 5133 PetscFunctionBegin; 5134 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5135 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5136 } 5137 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5138 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5139 5140 gen_to = (VecScatter_MPI_General*)ctx->todata; 5141 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5142 rvalues = gen_from->values; /* holds the length of receiving row */ 5143 svalues = gen_to->values; /* holds the length of sending row */ 5144 nrecvs = gen_from->n; 5145 nsends = gen_to->n; 5146 5147 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5148 srow = gen_to->indices; /* local row index to be sent */ 5149 sstarts = gen_to->starts; 5150 sprocs = gen_to->procs; 5151 sstatus = gen_to->sstatus; 5152 sbs = gen_to->bs; 5153 rstarts = gen_from->starts; 5154 rprocs = gen_from->procs; 5155 rbs = gen_from->bs; 5156 5157 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5158 if (scall == MAT_INITIAL_MATRIX){ 5159 /* i-array */ 5160 /*---------*/ 5161 /* post receives */ 5162 for (i=0; i<nrecvs; i++){ 5163 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5164 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5165 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5166 } 5167 5168 /* pack the outgoing message */ 5169 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5170 sstartsj[0] = 0; rstartsj[0] = 0; 5171 len = 0; /* total length of j or a array to be sent */ 5172 k = 0; 5173 for (i=0; i<nsends; i++){ 5174 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5175 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5176 for (j=0; j<nrows; j++) { 5177 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5178 for (l=0; l<sbs; l++){ 5179 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5180 rowlen[j*sbs+l] = ncols; 5181 len += ncols; 5182 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5183 } 5184 k++; 5185 } 5186 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5187 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5188 } 5189 /* recvs and sends of i-array are completed */ 5190 i = nrecvs; 5191 while (i--) { 5192 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5193 } 5194 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5195 5196 /* allocate buffers for sending j and a arrays */ 5197 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5198 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5199 5200 /* create i-array of B_oth */ 5201 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5202 b_othi[0] = 0; 5203 len = 0; /* total length of j or a array to be received */ 5204 k = 0; 5205 for (i=0; i<nrecvs; i++){ 5206 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5207 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5208 for (j=0; j<nrows; j++) { 5209 b_othi[k+1] = b_othi[k] + rowlen[j]; 5210 len += rowlen[j]; k++; 5211 } 5212 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5213 } 5214 5215 /* allocate space for j and a arrrays of B_oth */ 5216 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5217 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5218 5219 /* j-array */ 5220 /*---------*/ 5221 /* post receives of j-array */ 5222 for (i=0; i<nrecvs; i++){ 5223 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5224 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5225 } 5226 5227 /* pack the outgoing message j-array */ 5228 k = 0; 5229 for (i=0; i<nsends; i++){ 5230 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5231 bufJ = bufj+sstartsj[i]; 5232 for (j=0; j<nrows; j++) { 5233 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5234 for (ll=0; ll<sbs; ll++){ 5235 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5236 for (l=0; l<ncols; l++){ 5237 *bufJ++ = cols[l]; 5238 } 5239 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5240 } 5241 } 5242 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5243 } 5244 5245 /* recvs and sends of j-array are completed */ 5246 i = nrecvs; 5247 while (i--) { 5248 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5249 } 5250 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5251 } else if (scall == MAT_REUSE_MATRIX){ 5252 sstartsj = *startsj_s; 5253 rstartsj = *startsj_r; 5254 bufa = *bufa_ptr; 5255 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5256 b_otha = b_oth->a; 5257 } else { 5258 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5259 } 5260 5261 /* a-array */ 5262 /*---------*/ 5263 /* post receives of a-array */ 5264 for (i=0; i<nrecvs; i++){ 5265 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5266 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5267 } 5268 5269 /* pack the outgoing message a-array */ 5270 k = 0; 5271 for (i=0; i<nsends; i++){ 5272 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5273 bufA = bufa+sstartsj[i]; 5274 for (j=0; j<nrows; j++) { 5275 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5276 for (ll=0; ll<sbs; ll++){ 5277 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5278 for (l=0; l<ncols; l++){ 5279 *bufA++ = vals[l]; 5280 } 5281 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5282 } 5283 } 5284 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5285 } 5286 /* recvs and sends of a-array are completed */ 5287 i = nrecvs; 5288 while (i--) { 5289 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5290 } 5291 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5292 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5293 5294 if (scall == MAT_INITIAL_MATRIX){ 5295 /* put together the new matrix */ 5296 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5297 5298 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5299 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5300 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5301 b_oth->free_a = PETSC_TRUE; 5302 b_oth->free_ij = PETSC_TRUE; 5303 b_oth->nonew = 0; 5304 5305 ierr = PetscFree(bufj);CHKERRQ(ierr); 5306 if (!startsj_s || !bufa_ptr){ 5307 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5308 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5309 } else { 5310 *startsj_s = sstartsj; 5311 *startsj_r = rstartsj; 5312 *bufa_ptr = bufa; 5313 } 5314 } 5315 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5316 PetscFunctionReturn(0); 5317 } 5318 5319 #undef __FUNCT__ 5320 #define __FUNCT__ "MatGetCommunicationStructs" 5321 /*@C 5322 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5323 5324 Not Collective 5325 5326 Input Parameters: 5327 . A - The matrix in mpiaij format 5328 5329 Output Parameter: 5330 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5331 . colmap - A map from global column index to local index into lvec 5332 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5333 5334 Level: developer 5335 5336 @*/ 5337 #if defined (PETSC_USE_CTABLE) 5338 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5339 #else 5340 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5341 #endif 5342 { 5343 Mat_MPIAIJ *a; 5344 5345 PetscFunctionBegin; 5346 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5347 PetscValidPointer(lvec, 2); 5348 PetscValidPointer(colmap, 3); 5349 PetscValidPointer(multScatter, 4); 5350 a = (Mat_MPIAIJ *) A->data; 5351 if (lvec) *lvec = a->lvec; 5352 if (colmap) *colmap = a->colmap; 5353 if (multScatter) *multScatter = a->Mvctx; 5354 PetscFunctionReturn(0); 5355 } 5356 5357 EXTERN_C_BEGIN 5358 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5359 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5360 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5361 EXTERN_C_END 5362 5363 #undef __FUNCT__ 5364 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5365 /* 5366 Computes (B'*A')' since computing B*A directly is untenable 5367 5368 n p p 5369 ( ) ( ) ( ) 5370 m ( A ) * n ( B ) = m ( C ) 5371 ( ) ( ) ( ) 5372 5373 */ 5374 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5375 { 5376 PetscErrorCode ierr; 5377 Mat At,Bt,Ct; 5378 5379 PetscFunctionBegin; 5380 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5381 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5382 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5383 ierr = MatDestroy(&At);CHKERRQ(ierr); 5384 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5385 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5386 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5387 PetscFunctionReturn(0); 5388 } 5389 5390 #undef __FUNCT__ 5391 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5392 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5393 { 5394 PetscErrorCode ierr; 5395 PetscInt m=A->rmap->n,n=B->cmap->n; 5396 Mat Cmat; 5397 5398 PetscFunctionBegin; 5399 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5400 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5401 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5402 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5403 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5404 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5405 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5406 *C = Cmat; 5407 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5408 PetscFunctionReturn(0); 5409 } 5410 5411 /* ----------------------------------------------------------------*/ 5412 #undef __FUNCT__ 5413 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5414 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5415 { 5416 PetscErrorCode ierr; 5417 5418 PetscFunctionBegin; 5419 if (scall == MAT_INITIAL_MATRIX){ 5420 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5421 } 5422 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5423 PetscFunctionReturn(0); 5424 } 5425 5426 EXTERN_C_BEGIN 5427 #if defined(PETSC_HAVE_MUMPS) 5428 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5429 #endif 5430 #if defined(PETSC_HAVE_PASTIX) 5431 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5432 #endif 5433 #if defined(PETSC_HAVE_SUPERLU_DIST) 5434 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5435 #endif 5436 #if defined(PETSC_HAVE_SPOOLES) 5437 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5438 #endif 5439 EXTERN_C_END 5440 5441 /*MC 5442 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5443 5444 Options Database Keys: 5445 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5446 5447 Level: beginner 5448 5449 .seealso: MatCreateMPIAIJ() 5450 M*/ 5451 5452 EXTERN_C_BEGIN 5453 #undef __FUNCT__ 5454 #define __FUNCT__ "MatCreate_MPIAIJ" 5455 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5456 { 5457 Mat_MPIAIJ *b; 5458 PetscErrorCode ierr; 5459 PetscMPIInt size; 5460 5461 PetscFunctionBegin; 5462 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5463 5464 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5465 B->data = (void*)b; 5466 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5467 B->rmap->bs = 1; 5468 B->assembled = PETSC_FALSE; 5469 5470 B->insertmode = NOT_SET_VALUES; 5471 b->size = size; 5472 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5473 5474 /* build cache for off array entries formed */ 5475 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5476 b->donotstash = PETSC_FALSE; 5477 b->colmap = 0; 5478 b->garray = 0; 5479 b->roworiented = PETSC_TRUE; 5480 5481 /* stuff used for matrix vector multiply */ 5482 b->lvec = PETSC_NULL; 5483 b->Mvctx = PETSC_NULL; 5484 5485 /* stuff for MatGetRow() */ 5486 b->rowindices = 0; 5487 b->rowvalues = 0; 5488 b->getrowactive = PETSC_FALSE; 5489 5490 #if defined(PETSC_HAVE_SPOOLES) 5491 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5492 "MatGetFactor_mpiaij_spooles", 5493 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5494 #endif 5495 #if defined(PETSC_HAVE_MUMPS) 5496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5497 "MatGetFactor_aij_mumps", 5498 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5499 #endif 5500 #if defined(PETSC_HAVE_PASTIX) 5501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5502 "MatGetFactor_mpiaij_pastix", 5503 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5504 #endif 5505 #if defined(PETSC_HAVE_SUPERLU_DIST) 5506 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5507 "MatGetFactor_mpiaij_superlu_dist", 5508 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5509 #endif 5510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5511 "MatStoreValues_MPIAIJ", 5512 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5513 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5514 "MatRetrieveValues_MPIAIJ", 5515 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5516 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5517 "MatGetDiagonalBlock_MPIAIJ", 5518 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5519 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5520 "MatIsTranspose_MPIAIJ", 5521 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5522 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5523 "MatMPIAIJSetPreallocation_MPIAIJ", 5524 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5525 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5526 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5527 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5528 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5529 "MatDiagonalScaleLocal_MPIAIJ", 5530 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5531 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5532 "MatConvert_MPIAIJ_MPIAIJPERM", 5533 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5534 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5535 "MatConvert_MPIAIJ_MPIAIJCRL", 5536 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5537 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5538 "MatConvert_MPIAIJ_MPISBAIJ", 5539 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5540 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5541 "MatMatMult_MPIDense_MPIAIJ", 5542 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5543 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5544 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5545 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5546 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5547 "MatMatMultNumeric_MPIDense_MPIAIJ", 5548 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5549 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5550 PetscFunctionReturn(0); 5551 } 5552 EXTERN_C_END 5553 5554 #undef __FUNCT__ 5555 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5556 /*@ 5557 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5558 and "off-diagonal" part of the matrix in CSR format. 5559 5560 Collective on MPI_Comm 5561 5562 Input Parameters: 5563 + comm - MPI communicator 5564 . m - number of local rows (Cannot be PETSC_DECIDE) 5565 . n - This value should be the same as the local size used in creating the 5566 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5567 calculated if N is given) For square matrices n is almost always m. 5568 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5569 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5570 . i - row indices for "diagonal" portion of matrix 5571 . j - column indices 5572 . a - matrix values 5573 . oi - row indices for "off-diagonal" portion of matrix 5574 . oj - column indices 5575 - oa - matrix values 5576 5577 Output Parameter: 5578 . mat - the matrix 5579 5580 Level: advanced 5581 5582 Notes: 5583 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5584 must free the arrays once the matrix has been destroyed and not before. 5585 5586 The i and j indices are 0 based 5587 5588 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5589 5590 This sets local rows and cannot be used to set off-processor values. 5591 5592 You cannot later use MatSetValues() to change values in this matrix. 5593 5594 .keywords: matrix, aij, compressed row, sparse, parallel 5595 5596 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5597 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5598 @*/ 5599 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5600 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5601 { 5602 PetscErrorCode ierr; 5603 Mat_MPIAIJ *maij; 5604 5605 PetscFunctionBegin; 5606 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5607 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5608 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5609 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5610 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5611 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5612 maij = (Mat_MPIAIJ*) (*mat)->data; 5613 maij->donotstash = PETSC_TRUE; 5614 (*mat)->preallocated = PETSC_TRUE; 5615 5616 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5617 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5618 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5619 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5620 5621 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5622 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5623 5624 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5625 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5626 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5627 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5628 5629 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5630 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5631 PetscFunctionReturn(0); 5632 } 5633 5634 /* 5635 Special version for direct calls from Fortran 5636 */ 5637 #include <private/fortranimpl.h> 5638 5639 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5640 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5641 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5642 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5643 #endif 5644 5645 /* Change these macros so can be used in void function */ 5646 #undef CHKERRQ 5647 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5648 #undef SETERRQ2 5649 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5650 #undef SETERRQ 5651 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5652 5653 EXTERN_C_BEGIN 5654 #undef __FUNCT__ 5655 #define __FUNCT__ "matsetvaluesmpiaij_" 5656 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5657 { 5658 Mat mat = *mmat; 5659 PetscInt m = *mm, n = *mn; 5660 InsertMode addv = *maddv; 5661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5662 PetscScalar value; 5663 PetscErrorCode ierr; 5664 5665 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5666 if (mat->insertmode == NOT_SET_VALUES) { 5667 mat->insertmode = addv; 5668 } 5669 #if defined(PETSC_USE_DEBUG) 5670 else if (mat->insertmode != addv) { 5671 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5672 } 5673 #endif 5674 { 5675 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5676 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5677 PetscBool roworiented = aij->roworiented; 5678 5679 /* Some Variables required in the macro */ 5680 Mat A = aij->A; 5681 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5682 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5683 MatScalar *aa = a->a; 5684 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5685 Mat B = aij->B; 5686 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5687 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5688 MatScalar *ba = b->a; 5689 5690 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5691 PetscInt nonew = a->nonew; 5692 MatScalar *ap1,*ap2; 5693 5694 PetscFunctionBegin; 5695 for (i=0; i<m; i++) { 5696 if (im[i] < 0) continue; 5697 #if defined(PETSC_USE_DEBUG) 5698 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5699 #endif 5700 if (im[i] >= rstart && im[i] < rend) { 5701 row = im[i] - rstart; 5702 lastcol1 = -1; 5703 rp1 = aj + ai[row]; 5704 ap1 = aa + ai[row]; 5705 rmax1 = aimax[row]; 5706 nrow1 = ailen[row]; 5707 low1 = 0; 5708 high1 = nrow1; 5709 lastcol2 = -1; 5710 rp2 = bj + bi[row]; 5711 ap2 = ba + bi[row]; 5712 rmax2 = bimax[row]; 5713 nrow2 = bilen[row]; 5714 low2 = 0; 5715 high2 = nrow2; 5716 5717 for (j=0; j<n; j++) { 5718 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5719 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5720 if (in[j] >= cstart && in[j] < cend){ 5721 col = in[j] - cstart; 5722 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5723 } else if (in[j] < 0) continue; 5724 #if defined(PETSC_USE_DEBUG) 5725 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5726 #endif 5727 else { 5728 if (mat->was_assembled) { 5729 if (!aij->colmap) { 5730 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5731 } 5732 #if defined (PETSC_USE_CTABLE) 5733 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5734 col--; 5735 #else 5736 col = aij->colmap[in[j]] - 1; 5737 #endif 5738 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5739 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5740 col = in[j]; 5741 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5742 B = aij->B; 5743 b = (Mat_SeqAIJ*)B->data; 5744 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5745 rp2 = bj + bi[row]; 5746 ap2 = ba + bi[row]; 5747 rmax2 = bimax[row]; 5748 nrow2 = bilen[row]; 5749 low2 = 0; 5750 high2 = nrow2; 5751 bm = aij->B->rmap->n; 5752 ba = b->a; 5753 } 5754 } else col = in[j]; 5755 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5756 } 5757 } 5758 } else { 5759 if (!aij->donotstash) { 5760 if (roworiented) { 5761 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5762 } else { 5763 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5764 } 5765 } 5766 } 5767 }} 5768 PetscFunctionReturnVoid(); 5769 } 5770 EXTERN_C_END 5771 5772