1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = sqrt(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 876 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 877 { 878 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 879 PetscErrorCode ierr; 880 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 881 PetscInt i,*owners = A->rmap->range; 882 PetscInt *nprocs,j,idx,nsends; 883 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 884 PetscInt *rvalues,count,base,slen,*source; 885 PetscInt *lens,*lrows,*values,m; 886 MPI_Comm comm = ((PetscObject)A)->comm; 887 MPI_Request *send_waits,*recv_waits; 888 MPI_Status recv_status,*send_status; 889 const PetscScalar *xx; 890 PetscScalar *bb,*mask; 891 Vec xmask,lmask; 892 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 893 const PetscInt *aj, *ii,*ridx; 894 PetscScalar *aa; 895 #if defined(PETSC_DEBUG) 896 PetscBool found = PETSC_FALSE; 897 #endif 898 899 PetscFunctionBegin; 900 /* first count number of contributors to each processor */ 901 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 902 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 903 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 904 j = 0; 905 for (i=0; i<N; i++) { 906 if (lastidx > (idx = rows[i])) j = 0; 907 lastidx = idx; 908 for (; j<size; j++) { 909 if (idx >= owners[j] && idx < owners[j+1]) { 910 nprocs[2*j]++; 911 nprocs[2*j+1] = 1; 912 owner[i] = j; 913 #if defined(PETSC_DEBUG) 914 found = PETSC_TRUE; 915 #endif 916 break; 917 } 918 } 919 #if defined(PETSC_DEBUG) 920 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 921 found = PETSC_FALSE; 922 #endif 923 } 924 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 925 926 /* inform other processors of number of messages and max length*/ 927 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 928 929 /* post receives: */ 930 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 931 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 932 for (i=0; i<nrecvs; i++) { 933 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 934 } 935 936 /* do sends: 937 1) starts[i] gives the starting index in svalues for stuff going to 938 the ith processor 939 */ 940 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 941 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 942 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 943 starts[0] = 0; 944 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 945 for (i=0; i<N; i++) { 946 svalues[starts[owner[i]]++] = rows[i]; 947 } 948 949 starts[0] = 0; 950 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 951 count = 0; 952 for (i=0; i<size; i++) { 953 if (nprocs[2*i+1]) { 954 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 955 } 956 } 957 ierr = PetscFree(starts);CHKERRQ(ierr); 958 959 base = owners[rank]; 960 961 /* wait on receives */ 962 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 963 count = nrecvs; slen = 0; 964 while (count) { 965 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 966 /* unpack receives into our local space */ 967 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 968 source[imdex] = recv_status.MPI_SOURCE; 969 lens[imdex] = n; 970 slen += n; 971 count--; 972 } 973 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 974 975 /* move the data into the send scatter */ 976 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 977 count = 0; 978 for (i=0; i<nrecvs; i++) { 979 values = rvalues + i*nmax; 980 for (j=0; j<lens[i]; j++) { 981 lrows[count++] = values[j] - base; 982 } 983 } 984 ierr = PetscFree(rvalues);CHKERRQ(ierr); 985 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 986 ierr = PetscFree(owner);CHKERRQ(ierr); 987 ierr = PetscFree(nprocs);CHKERRQ(ierr); 988 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 989 990 /* zero diagonal part of matrix */ 991 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 992 993 /* handle off diagonal part of matrix */ 994 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 995 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 996 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 997 for (i=0; i<slen; i++) { 998 bb[lrows[i]] = 1; 999 } 1000 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1001 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1003 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1004 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1007 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1008 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1009 1010 /* remove zeroed rows of off diagonal matrix */ 1011 ii = aij->i; 1012 for (i=0; i<slen; i++) { 1013 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1014 } 1015 1016 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1017 if (aij->compressedrow.use){ 1018 m = aij->compressedrow.nrows; 1019 ii = aij->compressedrow.i; 1020 ridx = aij->compressedrow.rindex; 1021 for (i=0; i<m; i++){ 1022 n = ii[i+1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij->a + ii[i]; 1025 1026 for (j=0; j<n; j++) { 1027 if (PetscAbsScalar(mask[*aj])) { 1028 bb[*ridx] -= *aa*xx[*aj]; 1029 *aa = 0.0; 1030 } 1031 aa++; 1032 aj++; 1033 } 1034 ridx++; 1035 } 1036 } else { /* do not use compressed row format */ 1037 m = l->B->rmap->n; 1038 for (i=0; i<m; i++) { 1039 n = ii[i+1] - ii[i]; 1040 aj = aij->j + ii[i]; 1041 aa = aij->a + ii[i]; 1042 for (j=0; j<n; j++) { 1043 if (PetscAbsScalar(mask[*aj])) { 1044 bb[i] -= *aa*xx[*aj]; 1045 *aa = 0.0; 1046 } 1047 aa++; 1048 aj++; 1049 } 1050 } 1051 } 1052 1053 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1054 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1055 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1056 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1057 ierr = PetscFree(lrows);CHKERRQ(ierr); 1058 1059 /* wait on sends */ 1060 if (nsends) { 1061 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1062 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1063 ierr = PetscFree(send_status);CHKERRQ(ierr); 1064 } 1065 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1066 ierr = PetscFree(svalues);CHKERRQ(ierr); 1067 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatMult_MPIAIJ" 1073 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1074 { 1075 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1076 PetscErrorCode ierr; 1077 PetscInt nt; 1078 1079 PetscFunctionBegin; 1080 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1081 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1082 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1083 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1084 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1085 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1091 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1092 { 1093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1103 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1111 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1118 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1119 { 1120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1121 PetscErrorCode ierr; 1122 PetscBool merged; 1123 1124 PetscFunctionBegin; 1125 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1126 /* do nondiagonal part */ 1127 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1128 if (!merged) { 1129 /* send it on its way */ 1130 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1131 /* do local part */ 1132 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1133 /* receive remote parts: note this assumes the values are not actually */ 1134 /* added in yy until the next line, */ 1135 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1136 } else { 1137 /* do local part */ 1138 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1139 /* send it on its way */ 1140 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 /* values actually were received in the Begin() but we need to call this nop */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } 1144 PetscFunctionReturn(0); 1145 } 1146 1147 EXTERN_C_BEGIN 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1150 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1151 { 1152 MPI_Comm comm; 1153 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1154 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1155 IS Me,Notme; 1156 PetscErrorCode ierr; 1157 PetscInt M,N,first,last,*notme,i; 1158 PetscMPIInt size; 1159 1160 PetscFunctionBegin; 1161 1162 /* Easy test: symmetric diagonal block */ 1163 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1164 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1165 if (!*f) PetscFunctionReturn(0); 1166 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1167 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1168 if (size == 1) PetscFunctionReturn(0); 1169 1170 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1171 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1172 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1173 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1174 for (i=0; i<first; i++) notme[i] = i; 1175 for (i=last; i<M; i++) notme[i-last+first] = i; 1176 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1177 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1178 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1179 Aoff = Aoffs[0]; 1180 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1181 Boff = Boffs[0]; 1182 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1183 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1184 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1185 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1186 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1187 ierr = PetscFree(notme);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 EXTERN_C_END 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1194 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1195 { 1196 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 /* do nondiagonal part */ 1201 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1202 /* send it on its way */ 1203 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1204 /* do local part */ 1205 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1206 /* receive remote parts */ 1207 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 /* 1212 This only works correctly for square matrices where the subblock A->A is the 1213 diagonal block 1214 */ 1215 #undef __FUNCT__ 1216 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1217 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1218 { 1219 PetscErrorCode ierr; 1220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1221 1222 PetscFunctionBegin; 1223 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1224 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1225 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatScale_MPIAIJ" 1231 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1232 { 1233 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1234 PetscErrorCode ierr; 1235 1236 PetscFunctionBegin; 1237 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1238 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatDestroy_MPIAIJ" 1244 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1245 { 1246 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 #if defined(PETSC_USE_LOG) 1251 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1252 #endif 1253 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1254 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1255 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1256 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1257 #if defined (PETSC_USE_CTABLE) 1258 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1259 #else 1260 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1261 #endif 1262 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1263 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1264 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1265 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1266 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1267 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1268 1269 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1283 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1286 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1287 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1288 PetscErrorCode ierr; 1289 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1290 int fd; 1291 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1292 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1293 PetscScalar *column_values; 1294 PetscInt message_count,flowcontrolcount; 1295 1296 PetscFunctionBegin; 1297 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1298 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1299 nz = A->nz + B->nz; 1300 if (!rank) { 1301 header[0] = MAT_FILE_CLASSID; 1302 header[1] = mat->rmap->N; 1303 header[2] = mat->cmap->N; 1304 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1305 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1306 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1307 /* get largest number of rows any processor has */ 1308 rlen = mat->rmap->n; 1309 range = mat->rmap->range; 1310 for (i=1; i<size; i++) { 1311 rlen = PetscMax(rlen,range[i+1] - range[i]); 1312 } 1313 } else { 1314 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1315 rlen = mat->rmap->n; 1316 } 1317 1318 /* load up the local row counts */ 1319 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1320 for (i=0; i<mat->rmap->n; i++) { 1321 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1322 } 1323 1324 /* store the row lengths to the file */ 1325 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1326 if (!rank) { 1327 MPI_Status status; 1328 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1329 for (i=1; i<size; i++) { 1330 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1331 rlen = range[i+1] - range[i]; 1332 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1333 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1334 } 1335 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1336 } else { 1337 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1338 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1340 } 1341 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1342 1343 /* load up the local column indices */ 1344 nzmax = nz; /* )th processor needs space a largest processor needs */ 1345 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1346 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1347 cnt = 0; 1348 for (i=0; i<mat->rmap->n; i++) { 1349 for (j=B->i[i]; j<B->i[i+1]; j++) { 1350 if ( (col = garray[B->j[j]]) > cstart) break; 1351 column_indices[cnt++] = col; 1352 } 1353 for (k=A->i[i]; k<A->i[i+1]; k++) { 1354 column_indices[cnt++] = A->j[k] + cstart; 1355 } 1356 for (; j<B->i[i+1]; j++) { 1357 column_indices[cnt++] = garray[B->j[j]]; 1358 } 1359 } 1360 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1361 1362 /* store the column indices to the file */ 1363 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1364 if (!rank) { 1365 MPI_Status status; 1366 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1367 for (i=1; i<size; i++) { 1368 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1369 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1370 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1371 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1372 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1373 } 1374 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1375 } else { 1376 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1377 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1378 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1380 } 1381 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1382 1383 /* load up the local column values */ 1384 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1385 cnt = 0; 1386 for (i=0; i<mat->rmap->n; i++) { 1387 for (j=B->i[i]; j<B->i[i+1]; j++) { 1388 if ( garray[B->j[j]] > cstart) break; 1389 column_values[cnt++] = B->a[j]; 1390 } 1391 for (k=A->i[i]; k<A->i[i+1]; k++) { 1392 column_values[cnt++] = A->a[k]; 1393 } 1394 for (; j<B->i[i+1]; j++) { 1395 column_values[cnt++] = B->a[j]; 1396 } 1397 } 1398 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1399 1400 /* store the column values to the file */ 1401 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1402 if (!rank) { 1403 MPI_Status status; 1404 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1405 for (i=1; i<size; i++) { 1406 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1407 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1408 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1409 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1410 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1411 } 1412 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1413 } else { 1414 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1415 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1416 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1418 } 1419 ierr = PetscFree(column_values);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1425 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1426 { 1427 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1428 PetscErrorCode ierr; 1429 PetscMPIInt rank = aij->rank,size = aij->size; 1430 PetscBool isdraw,iascii,isbinary; 1431 PetscViewer sviewer; 1432 PetscViewerFormat format; 1433 1434 PetscFunctionBegin; 1435 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1436 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1437 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1438 if (iascii) { 1439 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1440 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1441 MatInfo info; 1442 PetscBool inodes; 1443 1444 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1445 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1446 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1447 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1448 if (!inodes) { 1449 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1450 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1451 } else { 1452 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1453 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1454 } 1455 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1456 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1457 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1458 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1459 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1462 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1465 PetscInt inodecount,inodelimit,*inodes; 1466 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1467 if (inodes) { 1468 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1469 } else { 1470 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1471 } 1472 PetscFunctionReturn(0); 1473 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1474 PetscFunctionReturn(0); 1475 } 1476 } else if (isbinary) { 1477 if (size == 1) { 1478 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1479 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1480 } else { 1481 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } else if (isdraw) { 1485 PetscDraw draw; 1486 PetscBool isnull; 1487 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1488 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1489 } 1490 1491 if (size == 1) { 1492 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1493 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1494 } else { 1495 /* assemble the entire matrix onto first processor. */ 1496 Mat A; 1497 Mat_SeqAIJ *Aloc; 1498 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1499 MatScalar *a; 1500 1501 if (mat->rmap->N > 1024) { 1502 PetscBool flg = PETSC_FALSE; 1503 1504 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1505 if (!flg) { 1506 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1507 } 1508 } 1509 1510 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1511 if (!rank) { 1512 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1513 } else { 1514 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1515 } 1516 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1517 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1518 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1519 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1520 1521 /* copy over the A part */ 1522 Aloc = (Mat_SeqAIJ*)aij->A->data; 1523 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1524 row = mat->rmap->rstart; 1525 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1526 for (i=0; i<m; i++) { 1527 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1528 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1529 } 1530 aj = Aloc->j; 1531 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1532 1533 /* copy over the B part */ 1534 Aloc = (Mat_SeqAIJ*)aij->B->data; 1535 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1536 row = mat->rmap->rstart; 1537 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1538 ct = cols; 1539 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1540 for (i=0; i<m; i++) { 1541 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1542 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1543 } 1544 ierr = PetscFree(ct);CHKERRQ(ierr); 1545 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1546 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1547 /* 1548 Everyone has to call to draw the matrix since the graphics waits are 1549 synchronized across all processors that share the PetscDraw object 1550 */ 1551 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1552 if (!rank) { 1553 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1554 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1555 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1556 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1557 } 1558 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1559 ierr = MatDestroy(&A);CHKERRQ(ierr); 1560 } 1561 PetscFunctionReturn(0); 1562 } 1563 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MatView_MPIAIJ" 1566 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1567 { 1568 PetscErrorCode ierr; 1569 PetscBool iascii,isdraw,issocket,isbinary; 1570 1571 PetscFunctionBegin; 1572 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1573 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1574 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1575 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1576 if (iascii || isdraw || isbinary || issocket) { 1577 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1578 } else { 1579 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatSOR_MPIAIJ" 1586 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1587 { 1588 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1589 PetscErrorCode ierr; 1590 Vec bb1 = 0; 1591 PetscBool hasop; 1592 1593 PetscFunctionBegin; 1594 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1595 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1596 } 1597 1598 if (flag == SOR_APPLY_UPPER) { 1599 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1600 PetscFunctionReturn(0); 1601 } 1602 1603 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1604 if (flag & SOR_ZERO_INITIAL_GUESS) { 1605 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1606 its--; 1607 } 1608 1609 while (its--) { 1610 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1611 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 1613 /* update rhs: bb1 = bb - B*x */ 1614 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1615 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1616 1617 /* local sweep */ 1618 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1619 } 1620 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1621 if (flag & SOR_ZERO_INITIAL_GUESS) { 1622 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1623 its--; 1624 } 1625 while (its--) { 1626 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1627 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 1629 /* update rhs: bb1 = bb - B*x */ 1630 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1631 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1632 1633 /* local sweep */ 1634 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1635 } 1636 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1637 if (flag & SOR_ZERO_INITIAL_GUESS) { 1638 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1639 its--; 1640 } 1641 while (its--) { 1642 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 1645 /* update rhs: bb1 = bb - B*x */ 1646 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1647 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1648 1649 /* local sweep */ 1650 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1651 } 1652 } else if (flag & SOR_EISENSTAT) { 1653 Vec xx1; 1654 1655 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1657 1658 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1659 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 if (!mat->diag) { 1661 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1662 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1663 } 1664 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1665 if (hasop) { 1666 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1667 } else { 1668 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1669 } 1670 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1671 1672 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1673 1674 /* local sweep */ 1675 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1676 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1677 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1678 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1679 1680 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatPermute_MPIAIJ" 1686 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1687 { 1688 MPI_Comm comm,pcomm; 1689 PetscInt first,local_size,nrows; 1690 const PetscInt *rows; 1691 PetscMPIInt size; 1692 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1697 /* make a collective version of 'rowp' */ 1698 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1699 if (pcomm==comm) { 1700 crowp = rowp; 1701 } else { 1702 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1703 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1704 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1705 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1706 } 1707 /* collect the global row permutation and invert it */ 1708 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1709 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1710 if (pcomm!=comm) { 1711 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1712 } 1713 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1714 /* get the local target indices */ 1715 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1716 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1717 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1718 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1719 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1720 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1721 /* the column permutation is so much easier; 1722 make a local version of 'colp' and invert it */ 1723 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1724 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1725 if (size==1) { 1726 lcolp = colp; 1727 } else { 1728 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1729 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1730 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1731 } 1732 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1733 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1734 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1735 if (size>1) { 1736 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1737 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1738 } 1739 /* now we just get the submatrix */ 1740 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1741 /* clean up */ 1742 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1743 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1749 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1750 { 1751 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1752 Mat A = mat->A,B = mat->B; 1753 PetscErrorCode ierr; 1754 PetscReal isend[5],irecv[5]; 1755 1756 PetscFunctionBegin; 1757 info->block_size = 1.0; 1758 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1759 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1760 isend[3] = info->memory; isend[4] = info->mallocs; 1761 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1762 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1763 isend[3] += info->memory; isend[4] += info->mallocs; 1764 if (flag == MAT_LOCAL) { 1765 info->nz_used = isend[0]; 1766 info->nz_allocated = isend[1]; 1767 info->nz_unneeded = isend[2]; 1768 info->memory = isend[3]; 1769 info->mallocs = isend[4]; 1770 } else if (flag == MAT_GLOBAL_MAX) { 1771 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1779 info->nz_used = irecv[0]; 1780 info->nz_allocated = irecv[1]; 1781 info->nz_unneeded = irecv[2]; 1782 info->memory = irecv[3]; 1783 info->mallocs = irecv[4]; 1784 } 1785 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1786 info->fill_ratio_needed = 0; 1787 info->factor_mallocs = 0; 1788 1789 PetscFunctionReturn(0); 1790 } 1791 1792 #undef __FUNCT__ 1793 #define __FUNCT__ "MatSetOption_MPIAIJ" 1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1795 { 1796 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 switch (op) { 1801 case MAT_NEW_NONZERO_LOCATIONS: 1802 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1803 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1804 case MAT_KEEP_NONZERO_PATTERN: 1805 case MAT_NEW_NONZERO_LOCATION_ERR: 1806 case MAT_USE_INODES: 1807 case MAT_IGNORE_ZERO_ENTRIES: 1808 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1809 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1810 break; 1811 case MAT_ROW_ORIENTED: 1812 a->roworiented = flg; 1813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1814 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1815 break; 1816 case MAT_NEW_DIAGONALS: 1817 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1818 break; 1819 case MAT_IGNORE_OFF_PROC_ENTRIES: 1820 a->donotstash = PETSC_TRUE; 1821 break; 1822 case MAT_SPD: 1823 A->spd_set = PETSC_TRUE; 1824 A->spd = flg; 1825 if (flg) { 1826 A->symmetric = PETSC_TRUE; 1827 A->structurally_symmetric = PETSC_TRUE; 1828 A->symmetric_set = PETSC_TRUE; 1829 A->structurally_symmetric_set = PETSC_TRUE; 1830 } 1831 break; 1832 case MAT_SYMMETRIC: 1833 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1834 break; 1835 case MAT_STRUCTURALLY_SYMMETRIC: 1836 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1837 break; 1838 case MAT_HERMITIAN: 1839 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1840 break; 1841 case MAT_SYMMETRY_ETERNAL: 1842 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1843 break; 1844 default: 1845 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1846 } 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatGetRow_MPIAIJ" 1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1853 { 1854 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1855 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1856 PetscErrorCode ierr; 1857 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1858 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1859 PetscInt *cmap,*idx_p; 1860 1861 PetscFunctionBegin; 1862 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1863 mat->getrowactive = PETSC_TRUE; 1864 1865 if (!mat->rowvalues && (idx || v)) { 1866 /* 1867 allocate enough space to hold information from the longest row. 1868 */ 1869 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1870 PetscInt max = 1,tmp; 1871 for (i=0; i<matin->rmap->n; i++) { 1872 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1873 if (max < tmp) { max = tmp; } 1874 } 1875 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1876 } 1877 1878 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1879 lrow = row - rstart; 1880 1881 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1882 if (!v) {pvA = 0; pvB = 0;} 1883 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1884 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1885 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1886 nztot = nzA + nzB; 1887 1888 cmap = mat->garray; 1889 if (v || idx) { 1890 if (nztot) { 1891 /* Sort by increasing column numbers, assuming A and B already sorted */ 1892 PetscInt imark = -1; 1893 if (v) { 1894 *v = v_p = mat->rowvalues; 1895 for (i=0; i<nzB; i++) { 1896 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1897 else break; 1898 } 1899 imark = i; 1900 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1901 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1902 } 1903 if (idx) { 1904 *idx = idx_p = mat->rowindices; 1905 if (imark > -1) { 1906 for (i=0; i<imark; i++) { 1907 idx_p[i] = cmap[cworkB[i]]; 1908 } 1909 } else { 1910 for (i=0; i<nzB; i++) { 1911 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1912 else break; 1913 } 1914 imark = i; 1915 } 1916 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1917 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1918 } 1919 } else { 1920 if (idx) *idx = 0; 1921 if (v) *v = 0; 1922 } 1923 } 1924 *nz = nztot; 1925 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1926 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 #undef __FUNCT__ 1931 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1932 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1933 { 1934 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1935 1936 PetscFunctionBegin; 1937 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1938 aij->getrowactive = PETSC_FALSE; 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatNorm_MPIAIJ" 1944 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1945 { 1946 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1947 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1948 PetscErrorCode ierr; 1949 PetscInt i,j,cstart = mat->cmap->rstart; 1950 PetscReal sum = 0.0; 1951 MatScalar *v; 1952 1953 PetscFunctionBegin; 1954 if (aij->size == 1) { 1955 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1956 } else { 1957 if (type == NORM_FROBENIUS) { 1958 v = amat->a; 1959 for (i=0; i<amat->nz; i++) { 1960 #if defined(PETSC_USE_COMPLEX) 1961 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1962 #else 1963 sum += (*v)*(*v); v++; 1964 #endif 1965 } 1966 v = bmat->a; 1967 for (i=0; i<bmat->nz; i++) { 1968 #if defined(PETSC_USE_COMPLEX) 1969 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1970 #else 1971 sum += (*v)*(*v); v++; 1972 #endif 1973 } 1974 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1975 *norm = sqrt(*norm); 1976 } else if (type == NORM_1) { /* max column norm */ 1977 PetscReal *tmp,*tmp2; 1978 PetscInt *jj,*garray = aij->garray; 1979 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1980 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1981 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1982 *norm = 0.0; 1983 v = amat->a; jj = amat->j; 1984 for (j=0; j<amat->nz; j++) { 1985 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1986 } 1987 v = bmat->a; jj = bmat->j; 1988 for (j=0; j<bmat->nz; j++) { 1989 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1990 } 1991 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1992 for (j=0; j<mat->cmap->N; j++) { 1993 if (tmp2[j] > *norm) *norm = tmp2[j]; 1994 } 1995 ierr = PetscFree(tmp);CHKERRQ(ierr); 1996 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1997 } else if (type == NORM_INFINITY) { /* max row norm */ 1998 PetscReal ntemp = 0.0; 1999 for (j=0; j<aij->A->rmap->n; j++) { 2000 v = amat->a + amat->i[j]; 2001 sum = 0.0; 2002 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2003 sum += PetscAbsScalar(*v); v++; 2004 } 2005 v = bmat->a + bmat->i[j]; 2006 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2007 sum += PetscAbsScalar(*v); v++; 2008 } 2009 if (sum > ntemp) ntemp = sum; 2010 } 2011 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2012 } else { 2013 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2014 } 2015 } 2016 PetscFunctionReturn(0); 2017 } 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatTranspose_MPIAIJ" 2021 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2022 { 2023 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2024 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2025 PetscErrorCode ierr; 2026 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2027 PetscInt cstart=A->cmap->rstart,ncol; 2028 Mat B; 2029 MatScalar *array; 2030 2031 PetscFunctionBegin; 2032 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2033 2034 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2035 ai = Aloc->i; aj = Aloc->j; 2036 bi = Bloc->i; bj = Bloc->j; 2037 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2038 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2039 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2040 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2041 for (i=0; i<ai[ma]; i++){ 2042 d_nnz[aj[i]] ++; 2043 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2044 } 2045 2046 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2047 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2048 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2049 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2050 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2051 } else { 2052 B = *matout; 2053 } 2054 2055 /* copy over the A part */ 2056 array = Aloc->a; 2057 row = A->rmap->rstart; 2058 for (i=0; i<ma; i++) { 2059 ncol = ai[i+1]-ai[i]; 2060 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2061 row++; array += ncol; aj += ncol; 2062 } 2063 aj = Aloc->j; 2064 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2065 2066 /* copy over the B part */ 2067 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2068 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2069 array = Bloc->a; 2070 row = A->rmap->rstart; 2071 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2072 cols_tmp = cols; 2073 for (i=0; i<mb; i++) { 2074 ncol = bi[i+1]-bi[i]; 2075 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2076 row++; array += ncol; cols_tmp += ncol; 2077 } 2078 ierr = PetscFree(cols);CHKERRQ(ierr); 2079 2080 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2081 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2082 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2083 *matout = B; 2084 } else { 2085 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2086 } 2087 PetscFunctionReturn(0); 2088 } 2089 2090 #undef __FUNCT__ 2091 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2092 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2093 { 2094 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2095 Mat a = aij->A,b = aij->B; 2096 PetscErrorCode ierr; 2097 PetscInt s1,s2,s3; 2098 2099 PetscFunctionBegin; 2100 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2101 if (rr) { 2102 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2103 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2104 /* Overlap communication with computation. */ 2105 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2106 } 2107 if (ll) { 2108 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2109 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2110 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2111 } 2112 /* scale the diagonal block */ 2113 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2114 2115 if (rr) { 2116 /* Do a scatter end and then right scale the off-diagonal block */ 2117 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2119 } 2120 2121 PetscFunctionReturn(0); 2122 } 2123 2124 #undef __FUNCT__ 2125 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2126 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2127 { 2128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2129 PetscErrorCode ierr; 2130 2131 PetscFunctionBegin; 2132 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2133 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2134 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2135 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 #undef __FUNCT__ 2139 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2140 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2141 { 2142 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2143 PetscErrorCode ierr; 2144 2145 PetscFunctionBegin; 2146 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatEqual_MPIAIJ" 2152 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2153 { 2154 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2155 Mat a,b,c,d; 2156 PetscBool flg; 2157 PetscErrorCode ierr; 2158 2159 PetscFunctionBegin; 2160 a = matA->A; b = matA->B; 2161 c = matB->A; d = matB->B; 2162 2163 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2164 if (flg) { 2165 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2166 } 2167 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2168 PetscFunctionReturn(0); 2169 } 2170 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatCopy_MPIAIJ" 2173 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2174 { 2175 PetscErrorCode ierr; 2176 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2177 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2178 2179 PetscFunctionBegin; 2180 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2181 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2182 /* because of the column compression in the off-processor part of the matrix a->B, 2183 the number of columns in a->B and b->B may be different, hence we cannot call 2184 the MatCopy() directly on the two parts. If need be, we can provide a more 2185 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2186 then copying the submatrices */ 2187 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2188 } else { 2189 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2190 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2191 } 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2197 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2198 { 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "MatAXPY_MPIAIJ" 2208 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2209 { 2210 PetscErrorCode ierr; 2211 PetscInt i; 2212 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2213 PetscBLASInt bnz,one=1; 2214 Mat_SeqAIJ *x,*y; 2215 2216 PetscFunctionBegin; 2217 if (str == SAME_NONZERO_PATTERN) { 2218 PetscScalar alpha = a; 2219 x = (Mat_SeqAIJ *)xx->A->data; 2220 y = (Mat_SeqAIJ *)yy->A->data; 2221 bnz = PetscBLASIntCast(x->nz); 2222 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2223 x = (Mat_SeqAIJ *)xx->B->data; 2224 y = (Mat_SeqAIJ *)yy->B->data; 2225 bnz = PetscBLASIntCast(x->nz); 2226 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2227 } else if (str == SUBSET_NONZERO_PATTERN) { 2228 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2229 2230 x = (Mat_SeqAIJ *)xx->B->data; 2231 y = (Mat_SeqAIJ *)yy->B->data; 2232 if (y->xtoy && y->XtoY != xx->B) { 2233 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2234 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2235 } 2236 if (!y->xtoy) { /* get xtoy */ 2237 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2238 y->XtoY = xx->B; 2239 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2240 } 2241 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2242 } else { 2243 Mat B; 2244 PetscInt *nnz_d,*nnz_o; 2245 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2246 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2247 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2248 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2249 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2250 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2251 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2253 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2254 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2255 ierr = MatHeaderReplace(Y,B); 2256 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatConjugate_MPIAIJ" 2266 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2267 { 2268 #if defined(PETSC_USE_COMPLEX) 2269 PetscErrorCode ierr; 2270 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2271 2272 PetscFunctionBegin; 2273 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2274 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2275 #else 2276 PetscFunctionBegin; 2277 #endif 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatRealPart_MPIAIJ" 2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2284 { 2285 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2286 PetscErrorCode ierr; 2287 2288 PetscFunctionBegin; 2289 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2290 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2296 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2297 { 2298 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2299 PetscErrorCode ierr; 2300 2301 PetscFunctionBegin; 2302 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2303 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #ifdef PETSC_HAVE_PBGL 2308 2309 #include <boost/parallel/mpi/bsp_process_group.hpp> 2310 #include <boost/graph/distributed/ilu_default_graph.hpp> 2311 #include <boost/graph/distributed/ilu_0_block.hpp> 2312 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2313 #include <boost/graph/distributed/petsc/interface.hpp> 2314 #include <boost/multi_array.hpp> 2315 #include <boost/parallel/distributed_property_map->hpp> 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2319 /* 2320 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2321 */ 2322 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2323 { 2324 namespace petsc = boost::distributed::petsc; 2325 2326 namespace graph_dist = boost::graph::distributed; 2327 using boost::graph::distributed::ilu_default::process_group_type; 2328 using boost::graph::ilu_permuted; 2329 2330 PetscBool row_identity, col_identity; 2331 PetscContainer c; 2332 PetscInt m, n, M, N; 2333 PetscErrorCode ierr; 2334 2335 PetscFunctionBegin; 2336 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2337 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2338 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2339 if (!row_identity || !col_identity) { 2340 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2341 } 2342 2343 process_group_type pg; 2344 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2345 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2346 lgraph_type& level_graph = *lgraph_p; 2347 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2348 2349 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2350 ilu_permuted(level_graph); 2351 2352 /* put together the new matrix */ 2353 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2354 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2355 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2356 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2357 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2358 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2359 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 2361 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2362 ierr = PetscContainerSetPointer(c, lgraph_p); 2363 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2364 ierr = PetscContainerDestroy(&c); 2365 PetscFunctionReturn(0); 2366 } 2367 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2370 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2371 { 2372 PetscFunctionBegin; 2373 PetscFunctionReturn(0); 2374 } 2375 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatSolve_MPIAIJ" 2378 /* 2379 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2380 */ 2381 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2382 { 2383 namespace graph_dist = boost::graph::distributed; 2384 2385 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2386 lgraph_type* lgraph_p; 2387 PetscContainer c; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2392 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2393 ierr = VecCopy(b, x);CHKERRQ(ierr); 2394 2395 PetscScalar* array_x; 2396 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2397 PetscInt sx; 2398 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2399 2400 PetscScalar* array_b; 2401 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2402 PetscInt sb; 2403 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2404 2405 lgraph_type& level_graph = *lgraph_p; 2406 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2407 2408 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2409 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2410 ref_x(array_x, boost::extents[num_vertices(graph)]); 2411 2412 typedef boost::iterator_property_map<array_ref_type::iterator, 2413 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2414 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2415 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2416 2417 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2418 2419 PetscFunctionReturn(0); 2420 } 2421 #endif 2422 2423 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2424 PetscInt nzlocal,nsends,nrecvs; 2425 PetscMPIInt *send_rank,*recv_rank; 2426 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2427 PetscScalar *sbuf_a,**rbuf_a; 2428 PetscErrorCode (*Destroy)(Mat); 2429 } Mat_Redundant; 2430 2431 #undef __FUNCT__ 2432 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2433 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2434 { 2435 PetscErrorCode ierr; 2436 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2437 PetscInt i; 2438 2439 PetscFunctionBegin; 2440 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2441 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2442 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2443 for (i=0; i<redund->nrecvs; i++){ 2444 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2445 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2446 } 2447 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2448 ierr = PetscFree(redund);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatDestroy_MatRedundant" 2454 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2455 { 2456 PetscErrorCode ierr; 2457 PetscContainer container; 2458 Mat_Redundant *redund=PETSC_NULL; 2459 2460 PetscFunctionBegin; 2461 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2462 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2463 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2464 A->ops->destroy = redund->Destroy; 2465 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2466 if (A->ops->destroy) { 2467 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2468 } 2469 PetscFunctionReturn(0); 2470 } 2471 2472 #undef __FUNCT__ 2473 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2474 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2475 { 2476 PetscMPIInt rank,size; 2477 MPI_Comm comm=((PetscObject)mat)->comm; 2478 PetscErrorCode ierr; 2479 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2480 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2481 PetscInt *rowrange=mat->rmap->range; 2482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2483 Mat A=aij->A,B=aij->B,C=*matredundant; 2484 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2485 PetscScalar *sbuf_a; 2486 PetscInt nzlocal=a->nz+b->nz; 2487 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2488 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2489 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2490 MatScalar *aworkA,*aworkB; 2491 PetscScalar *vals; 2492 PetscMPIInt tag1,tag2,tag3,imdex; 2493 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2494 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2495 MPI_Status recv_status,*send_status; 2496 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2497 PetscInt **rbuf_j=PETSC_NULL; 2498 PetscScalar **rbuf_a=PETSC_NULL; 2499 Mat_Redundant *redund=PETSC_NULL; 2500 PetscContainer container; 2501 2502 PetscFunctionBegin; 2503 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2504 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2505 2506 if (reuse == MAT_REUSE_MATRIX) { 2507 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2508 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2509 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2510 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2511 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2512 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2513 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2514 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2515 2516 nsends = redund->nsends; 2517 nrecvs = redund->nrecvs; 2518 send_rank = redund->send_rank; 2519 recv_rank = redund->recv_rank; 2520 sbuf_nz = redund->sbuf_nz; 2521 rbuf_nz = redund->rbuf_nz; 2522 sbuf_j = redund->sbuf_j; 2523 sbuf_a = redund->sbuf_a; 2524 rbuf_j = redund->rbuf_j; 2525 rbuf_a = redund->rbuf_a; 2526 } 2527 2528 if (reuse == MAT_INITIAL_MATRIX){ 2529 PetscMPIInt subrank,subsize; 2530 PetscInt nleftover,np_subcomm; 2531 /* get the destination processors' id send_rank, nsends and nrecvs */ 2532 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2533 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2534 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2535 np_subcomm = size/nsubcomm; 2536 nleftover = size - nsubcomm*np_subcomm; 2537 nsends = 0; nrecvs = 0; 2538 for (i=0; i<size; i++){ /* i=rank*/ 2539 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2540 send_rank[nsends] = i; nsends++; 2541 recv_rank[nrecvs++] = i; 2542 } 2543 } 2544 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2545 i = size-nleftover-1; 2546 j = 0; 2547 while (j < nsubcomm - nleftover){ 2548 send_rank[nsends++] = i; 2549 i--; j++; 2550 } 2551 } 2552 2553 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2554 for (i=0; i<nleftover; i++){ 2555 recv_rank[nrecvs++] = size-nleftover+i; 2556 } 2557 } 2558 2559 /* allocate sbuf_j, sbuf_a */ 2560 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2561 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2562 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2563 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2564 2565 /* copy mat's local entries into the buffers */ 2566 if (reuse == MAT_INITIAL_MATRIX){ 2567 rownz_max = 0; 2568 rptr = sbuf_j; 2569 cols = sbuf_j + rend-rstart + 1; 2570 vals = sbuf_a; 2571 rptr[0] = 0; 2572 for (i=0; i<rend-rstart; i++){ 2573 row = i + rstart; 2574 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2575 ncols = nzA + nzB; 2576 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2577 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2578 /* load the column indices for this row into cols */ 2579 lwrite = 0; 2580 for (l=0; l<nzB; l++) { 2581 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2582 vals[lwrite] = aworkB[l]; 2583 cols[lwrite++] = ctmp; 2584 } 2585 } 2586 for (l=0; l<nzA; l++){ 2587 vals[lwrite] = aworkA[l]; 2588 cols[lwrite++] = cstart + cworkA[l]; 2589 } 2590 for (l=0; l<nzB; l++) { 2591 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2592 vals[lwrite] = aworkB[l]; 2593 cols[lwrite++] = ctmp; 2594 } 2595 } 2596 vals += ncols; 2597 cols += ncols; 2598 rptr[i+1] = rptr[i] + ncols; 2599 if (rownz_max < ncols) rownz_max = ncols; 2600 } 2601 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2602 } else { /* only copy matrix values into sbuf_a */ 2603 rptr = sbuf_j; 2604 vals = sbuf_a; 2605 rptr[0] = 0; 2606 for (i=0; i<rend-rstart; i++){ 2607 row = i + rstart; 2608 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2609 ncols = nzA + nzB; 2610 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2611 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2612 lwrite = 0; 2613 for (l=0; l<nzB; l++) { 2614 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2615 } 2616 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2617 for (l=0; l<nzB; l++) { 2618 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2619 } 2620 vals += ncols; 2621 rptr[i+1] = rptr[i] + ncols; 2622 } 2623 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2624 2625 /* send nzlocal to others, and recv other's nzlocal */ 2626 /*--------------------------------------------------*/ 2627 if (reuse == MAT_INITIAL_MATRIX){ 2628 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2629 s_waits2 = s_waits3 + nsends; 2630 s_waits1 = s_waits2 + nsends; 2631 r_waits1 = s_waits1 + nsends; 2632 r_waits2 = r_waits1 + nrecvs; 2633 r_waits3 = r_waits2 + nrecvs; 2634 } else { 2635 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2636 r_waits3 = s_waits3 + nsends; 2637 } 2638 2639 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2640 if (reuse == MAT_INITIAL_MATRIX){ 2641 /* get new tags to keep the communication clean */ 2642 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2643 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2644 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2645 2646 /* post receives of other's nzlocal */ 2647 for (i=0; i<nrecvs; i++){ 2648 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2649 } 2650 /* send nzlocal to others */ 2651 for (i=0; i<nsends; i++){ 2652 sbuf_nz[i] = nzlocal; 2653 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2654 } 2655 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2656 count = nrecvs; 2657 while (count) { 2658 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2659 recv_rank[imdex] = recv_status.MPI_SOURCE; 2660 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2661 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2662 2663 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2664 rbuf_nz[imdex] += i + 2; 2665 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2666 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2667 count--; 2668 } 2669 /* wait on sends of nzlocal */ 2670 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2671 /* send mat->i,j to others, and recv from other's */ 2672 /*------------------------------------------------*/ 2673 for (i=0; i<nsends; i++){ 2674 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2675 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2676 } 2677 /* wait on receives of mat->i,j */ 2678 /*------------------------------*/ 2679 count = nrecvs; 2680 while (count) { 2681 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2682 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2683 count--; 2684 } 2685 /* wait on sends of mat->i,j */ 2686 /*---------------------------*/ 2687 if (nsends) { 2688 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2689 } 2690 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2691 2692 /* post receives, send and receive mat->a */ 2693 /*----------------------------------------*/ 2694 for (imdex=0; imdex<nrecvs; imdex++) { 2695 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2696 } 2697 for (i=0; i<nsends; i++){ 2698 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2699 } 2700 count = nrecvs; 2701 while (count) { 2702 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2703 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2704 count--; 2705 } 2706 if (nsends) { 2707 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2708 } 2709 2710 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2711 2712 /* create redundant matrix */ 2713 /*-------------------------*/ 2714 if (reuse == MAT_INITIAL_MATRIX){ 2715 /* compute rownz_max for preallocation */ 2716 for (imdex=0; imdex<nrecvs; imdex++){ 2717 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2718 rptr = rbuf_j[imdex]; 2719 for (i=0; i<j; i++){ 2720 ncols = rptr[i+1] - rptr[i]; 2721 if (rownz_max < ncols) rownz_max = ncols; 2722 } 2723 } 2724 2725 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2726 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2727 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2728 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2729 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2730 } else { 2731 C = *matredundant; 2732 } 2733 2734 /* insert local matrix entries */ 2735 rptr = sbuf_j; 2736 cols = sbuf_j + rend-rstart + 1; 2737 vals = sbuf_a; 2738 for (i=0; i<rend-rstart; i++){ 2739 row = i + rstart; 2740 ncols = rptr[i+1] - rptr[i]; 2741 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2742 vals += ncols; 2743 cols += ncols; 2744 } 2745 /* insert received matrix entries */ 2746 for (imdex=0; imdex<nrecvs; imdex++){ 2747 rstart = rowrange[recv_rank[imdex]]; 2748 rend = rowrange[recv_rank[imdex]+1]; 2749 rptr = rbuf_j[imdex]; 2750 cols = rbuf_j[imdex] + rend-rstart + 1; 2751 vals = rbuf_a[imdex]; 2752 for (i=0; i<rend-rstart; i++){ 2753 row = i + rstart; 2754 ncols = rptr[i+1] - rptr[i]; 2755 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2756 vals += ncols; 2757 cols += ncols; 2758 } 2759 } 2760 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2761 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2762 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2763 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2764 if (reuse == MAT_INITIAL_MATRIX) { 2765 PetscContainer container; 2766 *matredundant = C; 2767 /* create a supporting struct and attach it to C for reuse */ 2768 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2769 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2770 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2771 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2772 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2773 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2774 2775 redund->nzlocal = nzlocal; 2776 redund->nsends = nsends; 2777 redund->nrecvs = nrecvs; 2778 redund->send_rank = send_rank; 2779 redund->recv_rank = recv_rank; 2780 redund->sbuf_nz = sbuf_nz; 2781 redund->rbuf_nz = rbuf_nz; 2782 redund->sbuf_j = sbuf_j; 2783 redund->sbuf_a = sbuf_a; 2784 redund->rbuf_j = rbuf_j; 2785 redund->rbuf_a = rbuf_a; 2786 2787 redund->Destroy = C->ops->destroy; 2788 C->ops->destroy = MatDestroy_MatRedundant; 2789 } 2790 PetscFunctionReturn(0); 2791 } 2792 2793 #undef __FUNCT__ 2794 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2795 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2796 { 2797 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2798 PetscErrorCode ierr; 2799 PetscInt i,*idxb = 0; 2800 PetscScalar *va,*vb; 2801 Vec vtmp; 2802 2803 PetscFunctionBegin; 2804 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2805 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2806 if (idx) { 2807 for (i=0; i<A->rmap->n; i++) { 2808 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2809 } 2810 } 2811 2812 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2813 if (idx) { 2814 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2815 } 2816 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2817 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2818 2819 for (i=0; i<A->rmap->n; i++){ 2820 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2821 va[i] = vb[i]; 2822 if (idx) idx[i] = a->garray[idxb[i]]; 2823 } 2824 } 2825 2826 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2827 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2828 ierr = PetscFree(idxb);CHKERRQ(ierr); 2829 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2830 PetscFunctionReturn(0); 2831 } 2832 2833 #undef __FUNCT__ 2834 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2835 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2836 { 2837 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2838 PetscErrorCode ierr; 2839 PetscInt i,*idxb = 0; 2840 PetscScalar *va,*vb; 2841 Vec vtmp; 2842 2843 PetscFunctionBegin; 2844 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2845 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2846 if (idx) { 2847 for (i=0; i<A->cmap->n; i++) { 2848 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2849 } 2850 } 2851 2852 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2853 if (idx) { 2854 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2855 } 2856 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2857 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2858 2859 for (i=0; i<A->rmap->n; i++){ 2860 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2861 va[i] = vb[i]; 2862 if (idx) idx[i] = a->garray[idxb[i]]; 2863 } 2864 } 2865 2866 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2867 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2868 ierr = PetscFree(idxb);CHKERRQ(ierr); 2869 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2870 PetscFunctionReturn(0); 2871 } 2872 2873 #undef __FUNCT__ 2874 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2875 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2876 { 2877 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2878 PetscInt n = A->rmap->n; 2879 PetscInt cstart = A->cmap->rstart; 2880 PetscInt *cmap = mat->garray; 2881 PetscInt *diagIdx, *offdiagIdx; 2882 Vec diagV, offdiagV; 2883 PetscScalar *a, *diagA, *offdiagA; 2884 PetscInt r; 2885 PetscErrorCode ierr; 2886 2887 PetscFunctionBegin; 2888 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2889 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2890 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2891 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2892 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2893 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2894 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2895 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2896 for(r = 0; r < n; ++r) { 2897 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2898 a[r] = diagA[r]; 2899 idx[r] = cstart + diagIdx[r]; 2900 } else { 2901 a[r] = offdiagA[r]; 2902 idx[r] = cmap[offdiagIdx[r]]; 2903 } 2904 } 2905 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2906 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2907 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2908 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2909 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2910 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2911 PetscFunctionReturn(0); 2912 } 2913 2914 #undef __FUNCT__ 2915 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2916 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2917 { 2918 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2919 PetscInt n = A->rmap->n; 2920 PetscInt cstart = A->cmap->rstart; 2921 PetscInt *cmap = mat->garray; 2922 PetscInt *diagIdx, *offdiagIdx; 2923 Vec diagV, offdiagV; 2924 PetscScalar *a, *diagA, *offdiagA; 2925 PetscInt r; 2926 PetscErrorCode ierr; 2927 2928 PetscFunctionBegin; 2929 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2930 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2931 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2932 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2933 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2934 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2935 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2936 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2937 for(r = 0; r < n; ++r) { 2938 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2939 a[r] = diagA[r]; 2940 idx[r] = cstart + diagIdx[r]; 2941 } else { 2942 a[r] = offdiagA[r]; 2943 idx[r] = cmap[offdiagIdx[r]]; 2944 } 2945 } 2946 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2947 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2948 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2949 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2950 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2951 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2952 PetscFunctionReturn(0); 2953 } 2954 2955 #undef __FUNCT__ 2956 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2957 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2958 { 2959 PetscErrorCode ierr; 2960 Mat *dummy; 2961 2962 PetscFunctionBegin; 2963 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2964 *newmat = *dummy; 2965 ierr = PetscFree(dummy);CHKERRQ(ierr); 2966 PetscFunctionReturn(0); 2967 } 2968 2969 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2970 /* -------------------------------------------------------------------*/ 2971 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2972 MatGetRow_MPIAIJ, 2973 MatRestoreRow_MPIAIJ, 2974 MatMult_MPIAIJ, 2975 /* 4*/ MatMultAdd_MPIAIJ, 2976 MatMultTranspose_MPIAIJ, 2977 MatMultTransposeAdd_MPIAIJ, 2978 #ifdef PETSC_HAVE_PBGL 2979 MatSolve_MPIAIJ, 2980 #else 2981 0, 2982 #endif 2983 0, 2984 0, 2985 /*10*/ 0, 2986 0, 2987 0, 2988 MatSOR_MPIAIJ, 2989 MatTranspose_MPIAIJ, 2990 /*15*/ MatGetInfo_MPIAIJ, 2991 MatEqual_MPIAIJ, 2992 MatGetDiagonal_MPIAIJ, 2993 MatDiagonalScale_MPIAIJ, 2994 MatNorm_MPIAIJ, 2995 /*20*/ MatAssemblyBegin_MPIAIJ, 2996 MatAssemblyEnd_MPIAIJ, 2997 MatSetOption_MPIAIJ, 2998 MatZeroEntries_MPIAIJ, 2999 /*24*/ MatZeroRows_MPIAIJ, 3000 0, 3001 #ifdef PETSC_HAVE_PBGL 3002 0, 3003 #else 3004 0, 3005 #endif 3006 0, 3007 0, 3008 /*29*/ MatSetUpPreallocation_MPIAIJ, 3009 #ifdef PETSC_HAVE_PBGL 3010 0, 3011 #else 3012 0, 3013 #endif 3014 0, 3015 0, 3016 0, 3017 /*34*/ MatDuplicate_MPIAIJ, 3018 0, 3019 0, 3020 0, 3021 0, 3022 /*39*/ MatAXPY_MPIAIJ, 3023 MatGetSubMatrices_MPIAIJ, 3024 MatIncreaseOverlap_MPIAIJ, 3025 MatGetValues_MPIAIJ, 3026 MatCopy_MPIAIJ, 3027 /*44*/ MatGetRowMax_MPIAIJ, 3028 MatScale_MPIAIJ, 3029 0, 3030 0, 3031 MatZeroRowsColumns_MPIAIJ, 3032 /*49*/ MatSetBlockSize_MPIAIJ, 3033 0, 3034 0, 3035 0, 3036 0, 3037 /*54*/ MatFDColoringCreate_MPIAIJ, 3038 0, 3039 MatSetUnfactored_MPIAIJ, 3040 MatPermute_MPIAIJ, 3041 0, 3042 /*59*/ MatGetSubMatrix_MPIAIJ, 3043 MatDestroy_MPIAIJ, 3044 MatView_MPIAIJ, 3045 0, 3046 0, 3047 /*64*/ 0, 3048 0, 3049 0, 3050 0, 3051 0, 3052 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3053 MatGetRowMinAbs_MPIAIJ, 3054 0, 3055 MatSetColoring_MPIAIJ, 3056 #if defined(PETSC_HAVE_ADIC) 3057 MatSetValuesAdic_MPIAIJ, 3058 #else 3059 0, 3060 #endif 3061 MatSetValuesAdifor_MPIAIJ, 3062 /*75*/ MatFDColoringApply_AIJ, 3063 0, 3064 0, 3065 0, 3066 0, 3067 /*80*/ 0, 3068 0, 3069 0, 3070 /*83*/ MatLoad_MPIAIJ, 3071 0, 3072 0, 3073 0, 3074 0, 3075 0, 3076 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3077 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3078 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3079 MatPtAP_Basic, 3080 MatPtAPSymbolic_MPIAIJ, 3081 /*94*/ MatPtAPNumeric_MPIAIJ, 3082 0, 3083 0, 3084 0, 3085 0, 3086 /*99*/ 0, 3087 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3088 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3089 MatConjugate_MPIAIJ, 3090 0, 3091 /*104*/MatSetValuesRow_MPIAIJ, 3092 MatRealPart_MPIAIJ, 3093 MatImaginaryPart_MPIAIJ, 3094 0, 3095 0, 3096 /*109*/0, 3097 MatGetRedundantMatrix_MPIAIJ, 3098 MatGetRowMin_MPIAIJ, 3099 0, 3100 0, 3101 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 3102 0, 3103 0, 3104 0, 3105 0, 3106 /*119*/0, 3107 0, 3108 0, 3109 0, 3110 MatGetMultiProcBlock_MPIAIJ, 3111 /*124*/MatFindNonZeroRows_MPIAIJ, 3112 MatGetColumnNorms_MPIAIJ, 3113 0, 3114 0, 3115 MatGetSubMatricesParallel_MPIAIJ 3116 }; 3117 3118 /* ----------------------------------------------------------------------------------------*/ 3119 3120 EXTERN_C_BEGIN 3121 #undef __FUNCT__ 3122 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3123 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3124 { 3125 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3126 PetscErrorCode ierr; 3127 3128 PetscFunctionBegin; 3129 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3130 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3131 PetscFunctionReturn(0); 3132 } 3133 EXTERN_C_END 3134 3135 EXTERN_C_BEGIN 3136 #undef __FUNCT__ 3137 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3138 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3139 { 3140 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3141 PetscErrorCode ierr; 3142 3143 PetscFunctionBegin; 3144 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3145 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3146 PetscFunctionReturn(0); 3147 } 3148 EXTERN_C_END 3149 3150 EXTERN_C_BEGIN 3151 #undef __FUNCT__ 3152 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3153 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3154 { 3155 Mat_MPIAIJ *b; 3156 PetscErrorCode ierr; 3157 PetscInt i; 3158 3159 PetscFunctionBegin; 3160 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3161 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3162 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3163 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3164 3165 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3166 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3167 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3168 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3169 if (d_nnz) { 3170 for (i=0; i<B->rmap->n; i++) { 3171 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3172 } 3173 } 3174 if (o_nnz) { 3175 for (i=0; i<B->rmap->n; i++) { 3176 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3177 } 3178 } 3179 b = (Mat_MPIAIJ*)B->data; 3180 3181 if (!B->preallocated) { 3182 /* Explicitly create 2 MATSEQAIJ matrices. */ 3183 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3184 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3185 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3186 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3187 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3188 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3189 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3190 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3191 } 3192 3193 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3194 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3195 B->preallocated = PETSC_TRUE; 3196 PetscFunctionReturn(0); 3197 } 3198 EXTERN_C_END 3199 3200 #undef __FUNCT__ 3201 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3202 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3203 { 3204 Mat mat; 3205 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3206 PetscErrorCode ierr; 3207 3208 PetscFunctionBegin; 3209 *newmat = 0; 3210 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3211 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3212 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3213 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3214 a = (Mat_MPIAIJ*)mat->data; 3215 3216 mat->factortype = matin->factortype; 3217 mat->rmap->bs = matin->rmap->bs; 3218 mat->assembled = PETSC_TRUE; 3219 mat->insertmode = NOT_SET_VALUES; 3220 mat->preallocated = PETSC_TRUE; 3221 3222 a->size = oldmat->size; 3223 a->rank = oldmat->rank; 3224 a->donotstash = oldmat->donotstash; 3225 a->roworiented = oldmat->roworiented; 3226 a->rowindices = 0; 3227 a->rowvalues = 0; 3228 a->getrowactive = PETSC_FALSE; 3229 3230 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3231 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3232 3233 if (oldmat->colmap) { 3234 #if defined (PETSC_USE_CTABLE) 3235 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3236 #else 3237 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3238 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3239 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3240 #endif 3241 } else a->colmap = 0; 3242 if (oldmat->garray) { 3243 PetscInt len; 3244 len = oldmat->B->cmap->n; 3245 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3246 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3247 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3248 } else a->garray = 0; 3249 3250 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3251 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3252 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3253 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3254 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3255 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3256 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3257 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3258 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3259 *newmat = mat; 3260 PetscFunctionReturn(0); 3261 } 3262 3263 /* 3264 Allows sending/receiving larger messages then 2 gigabytes in a single call 3265 */ 3266 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 3267 { 3268 int ierr; 3269 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3270 PetscInt i,numchunks; 3271 PetscMPIInt icnt; 3272 3273 numchunks = cnt/CHUNKSIZE + 1; 3274 for (i=0; i<numchunks; i++) { 3275 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3276 ierr = MPI_Send(mess,icnt,type,to,tag,comm);if (ierr) return ierr; 3277 if (type == MPIU_INT) { 3278 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3279 } else if (type == MPIU_SCALAR) { 3280 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3281 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3282 } 3283 return 0; 3284 } 3285 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 3286 { 3287 int ierr; 3288 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3289 MPI_Status status; 3290 PetscInt i,numchunks; 3291 PetscMPIInt icnt; 3292 3293 numchunks = cnt/CHUNKSIZE + 1; 3294 for (i=0; i<numchunks; i++) { 3295 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3296 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);if (ierr) return ierr; 3297 if (type == MPIU_INT) { 3298 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3299 } else if (type == MPIU_SCALAR) { 3300 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3301 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3302 } 3303 return 0; 3304 } 3305 3306 #undef __FUNCT__ 3307 #define __FUNCT__ "MatLoad_MPIAIJ" 3308 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3309 { 3310 PetscScalar *vals,*svals; 3311 MPI_Comm comm = ((PetscObject)viewer)->comm; 3312 PetscErrorCode ierr; 3313 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3314 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3315 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3316 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3317 PetscInt cend,cstart,n,*rowners,sizesset=1; 3318 int fd; 3319 3320 PetscFunctionBegin; 3321 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3322 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3323 if (!rank) { 3324 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3325 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3326 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3327 } 3328 3329 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3330 3331 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3332 M = header[1]; N = header[2]; 3333 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3334 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3335 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3336 3337 /* If global sizes are set, check if they are consistent with that given in the file */ 3338 if (sizesset) { 3339 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3340 } 3341 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3342 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3343 3344 /* determine ownership of all rows */ 3345 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3346 else m = newMat->rmap->n; /* Set by user */ 3347 3348 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3349 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3350 3351 /* First process needs enough room for process with most rows */ 3352 if (!rank) { 3353 mmax = rowners[1]; 3354 for (i=2; i<size; i++) { 3355 mmax = PetscMax(mmax,rowners[i]); 3356 } 3357 } else mmax = m; 3358 3359 rowners[0] = 0; 3360 for (i=2; i<=size; i++) { 3361 rowners[i] += rowners[i-1]; 3362 } 3363 rstart = rowners[rank]; 3364 rend = rowners[rank+1]; 3365 3366 /* distribute row lengths to all processors */ 3367 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3368 if (!rank) { 3369 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3370 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3371 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3372 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3373 for (j=0; j<m; j++) { 3374 procsnz[0] += ourlens[j]; 3375 } 3376 for (i=1; i<size; i++) { 3377 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3378 /* calculate the number of nonzeros on each processor */ 3379 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3380 procsnz[i] += rowlengths[j]; 3381 } 3382 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3383 } 3384 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3385 } else { 3386 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3387 } 3388 3389 if (!rank) { 3390 /* determine max buffer needed and allocate it */ 3391 maxnz = 0; 3392 for (i=0; i<size; i++) { 3393 maxnz = PetscMax(maxnz,procsnz[i]); 3394 } 3395 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3396 3397 /* read in my part of the matrix column indices */ 3398 nz = procsnz[0]; 3399 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3400 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3401 3402 /* read in every one elses and ship off */ 3403 for (i=1; i<size; i++) { 3404 nz = procsnz[i]; 3405 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3406 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3407 } 3408 ierr = PetscFree(cols);CHKERRQ(ierr); 3409 } else { 3410 /* determine buffer space needed for message */ 3411 nz = 0; 3412 for (i=0; i<m; i++) { 3413 nz += ourlens[i]; 3414 } 3415 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3416 3417 /* receive message of column indices*/ 3418 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3419 } 3420 3421 /* determine column ownership if matrix is not square */ 3422 if (N != M) { 3423 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3424 else n = newMat->cmap->n; 3425 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3426 cstart = cend - n; 3427 } else { 3428 cstart = rstart; 3429 cend = rend; 3430 n = cend - cstart; 3431 } 3432 3433 /* loop over local rows, determining number of off diagonal entries */ 3434 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3435 jj = 0; 3436 for (i=0; i<m; i++) { 3437 for (j=0; j<ourlens[i]; j++) { 3438 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3439 jj++; 3440 } 3441 } 3442 3443 for (i=0; i<m; i++) { 3444 ourlens[i] -= offlens[i]; 3445 } 3446 if (!sizesset) { 3447 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3448 } 3449 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3450 3451 for (i=0; i<m; i++) { 3452 ourlens[i] += offlens[i]; 3453 } 3454 3455 if (!rank) { 3456 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3457 3458 /* read in my part of the matrix numerical values */ 3459 nz = procsnz[0]; 3460 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3461 3462 /* insert into matrix */ 3463 jj = rstart; 3464 smycols = mycols; 3465 svals = vals; 3466 for (i=0; i<m; i++) { 3467 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3468 smycols += ourlens[i]; 3469 svals += ourlens[i]; 3470 jj++; 3471 } 3472 3473 /* read in other processors and ship out */ 3474 for (i=1; i<size; i++) { 3475 nz = procsnz[i]; 3476 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3477 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3478 } 3479 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3480 } else { 3481 /* receive numeric values */ 3482 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3483 3484 /* receive message of values*/ 3485 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3486 3487 /* insert into matrix */ 3488 jj = rstart; 3489 smycols = mycols; 3490 svals = vals; 3491 for (i=0; i<m; i++) { 3492 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3493 smycols += ourlens[i]; 3494 svals += ourlens[i]; 3495 jj++; 3496 } 3497 } 3498 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3499 ierr = PetscFree(vals);CHKERRQ(ierr); 3500 ierr = PetscFree(mycols);CHKERRQ(ierr); 3501 ierr = PetscFree(rowners);CHKERRQ(ierr); 3502 3503 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3504 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3505 PetscFunctionReturn(0); 3506 } 3507 3508 #undef __FUNCT__ 3509 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3510 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3511 { 3512 PetscErrorCode ierr; 3513 IS iscol_local; 3514 PetscInt csize; 3515 3516 PetscFunctionBegin; 3517 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3518 if (call == MAT_REUSE_MATRIX) { 3519 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3520 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3521 } else { 3522 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3523 } 3524 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3525 if (call == MAT_INITIAL_MATRIX) { 3526 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3527 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3528 } 3529 PetscFunctionReturn(0); 3530 } 3531 3532 #undef __FUNCT__ 3533 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3534 /* 3535 Not great since it makes two copies of the submatrix, first an SeqAIJ 3536 in local and then by concatenating the local matrices the end result. 3537 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3538 3539 Note: This requires a sequential iscol with all indices. 3540 */ 3541 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3542 { 3543 PetscErrorCode ierr; 3544 PetscMPIInt rank,size; 3545 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3546 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3547 Mat *local,M,Mreuse; 3548 MatScalar *vwork,*aa; 3549 MPI_Comm comm = ((PetscObject)mat)->comm; 3550 Mat_SeqAIJ *aij; 3551 3552 3553 PetscFunctionBegin; 3554 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3555 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3556 3557 if (call == MAT_REUSE_MATRIX) { 3558 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3559 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3560 local = &Mreuse; 3561 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3562 } else { 3563 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3564 Mreuse = *local; 3565 ierr = PetscFree(local);CHKERRQ(ierr); 3566 } 3567 3568 /* 3569 m - number of local rows 3570 n - number of columns (same on all processors) 3571 rstart - first row in new global matrix generated 3572 */ 3573 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3574 if (call == MAT_INITIAL_MATRIX) { 3575 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3576 ii = aij->i; 3577 jj = aij->j; 3578 3579 /* 3580 Determine the number of non-zeros in the diagonal and off-diagonal 3581 portions of the matrix in order to do correct preallocation 3582 */ 3583 3584 /* first get start and end of "diagonal" columns */ 3585 if (csize == PETSC_DECIDE) { 3586 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3587 if (mglobal == n) { /* square matrix */ 3588 nlocal = m; 3589 } else { 3590 nlocal = n/size + ((n % size) > rank); 3591 } 3592 } else { 3593 nlocal = csize; 3594 } 3595 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3596 rstart = rend - nlocal; 3597 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3598 3599 /* next, compute all the lengths */ 3600 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3601 olens = dlens + m; 3602 for (i=0; i<m; i++) { 3603 jend = ii[i+1] - ii[i]; 3604 olen = 0; 3605 dlen = 0; 3606 for (j=0; j<jend; j++) { 3607 if (*jj < rstart || *jj >= rend) olen++; 3608 else dlen++; 3609 jj++; 3610 } 3611 olens[i] = olen; 3612 dlens[i] = dlen; 3613 } 3614 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3615 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3616 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3617 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3618 ierr = PetscFree(dlens);CHKERRQ(ierr); 3619 } else { 3620 PetscInt ml,nl; 3621 3622 M = *newmat; 3623 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3624 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3625 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3626 /* 3627 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3628 rather than the slower MatSetValues(). 3629 */ 3630 M->was_assembled = PETSC_TRUE; 3631 M->assembled = PETSC_FALSE; 3632 } 3633 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3634 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3635 ii = aij->i; 3636 jj = aij->j; 3637 aa = aij->a; 3638 for (i=0; i<m; i++) { 3639 row = rstart + i; 3640 nz = ii[i+1] - ii[i]; 3641 cwork = jj; jj += nz; 3642 vwork = aa; aa += nz; 3643 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3644 } 3645 3646 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3647 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3648 *newmat = M; 3649 3650 /* save submatrix used in processor for next request */ 3651 if (call == MAT_INITIAL_MATRIX) { 3652 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3653 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3654 } 3655 3656 PetscFunctionReturn(0); 3657 } 3658 3659 EXTERN_C_BEGIN 3660 #undef __FUNCT__ 3661 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3662 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3663 { 3664 PetscInt m,cstart, cend,j,nnz,i,d; 3665 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3666 const PetscInt *JJ; 3667 PetscScalar *values; 3668 PetscErrorCode ierr; 3669 3670 PetscFunctionBegin; 3671 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3672 3673 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3674 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3675 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3676 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3677 m = B->rmap->n; 3678 cstart = B->cmap->rstart; 3679 cend = B->cmap->rend; 3680 rstart = B->rmap->rstart; 3681 3682 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3683 3684 #if defined(PETSC_USE_DEBUGGING) 3685 for (i=0; i<m; i++) { 3686 nnz = Ii[i+1]- Ii[i]; 3687 JJ = J + Ii[i]; 3688 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3689 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3690 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3691 } 3692 #endif 3693 3694 for (i=0; i<m; i++) { 3695 nnz = Ii[i+1]- Ii[i]; 3696 JJ = J + Ii[i]; 3697 nnz_max = PetscMax(nnz_max,nnz); 3698 d = 0; 3699 for (j=0; j<nnz; j++) { 3700 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3701 } 3702 d_nnz[i] = d; 3703 o_nnz[i] = nnz - d; 3704 } 3705 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3706 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3707 3708 if (v) values = (PetscScalar*)v; 3709 else { 3710 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3711 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3712 } 3713 3714 for (i=0; i<m; i++) { 3715 ii = i + rstart; 3716 nnz = Ii[i+1]- Ii[i]; 3717 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3718 } 3719 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3720 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3721 3722 if (!v) { 3723 ierr = PetscFree(values);CHKERRQ(ierr); 3724 } 3725 PetscFunctionReturn(0); 3726 } 3727 EXTERN_C_END 3728 3729 #undef __FUNCT__ 3730 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3731 /*@ 3732 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3733 (the default parallel PETSc format). 3734 3735 Collective on MPI_Comm 3736 3737 Input Parameters: 3738 + B - the matrix 3739 . i - the indices into j for the start of each local row (starts with zero) 3740 . j - the column indices for each local row (starts with zero) 3741 - v - optional values in the matrix 3742 3743 Level: developer 3744 3745 Notes: 3746 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3747 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3748 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3749 3750 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3751 3752 The format which is used for the sparse matrix input, is equivalent to a 3753 row-major ordering.. i.e for the following matrix, the input data expected is 3754 as shown: 3755 3756 1 0 0 3757 2 0 3 P0 3758 ------- 3759 4 5 6 P1 3760 3761 Process0 [P0]: rows_owned=[0,1] 3762 i = {0,1,3} [size = nrow+1 = 2+1] 3763 j = {0,0,2} [size = nz = 6] 3764 v = {1,2,3} [size = nz = 6] 3765 3766 Process1 [P1]: rows_owned=[2] 3767 i = {0,3} [size = nrow+1 = 1+1] 3768 j = {0,1,2} [size = nz = 6] 3769 v = {4,5,6} [size = nz = 6] 3770 3771 .keywords: matrix, aij, compressed row, sparse, parallel 3772 3773 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3774 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3775 @*/ 3776 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3777 { 3778 PetscErrorCode ierr; 3779 3780 PetscFunctionBegin; 3781 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3782 PetscFunctionReturn(0); 3783 } 3784 3785 #undef __FUNCT__ 3786 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3787 /*@C 3788 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3789 (the default parallel PETSc format). For good matrix assembly performance 3790 the user should preallocate the matrix storage by setting the parameters 3791 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3792 performance can be increased by more than a factor of 50. 3793 3794 Collective on MPI_Comm 3795 3796 Input Parameters: 3797 + A - the matrix 3798 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3799 (same value is used for all local rows) 3800 . d_nnz - array containing the number of nonzeros in the various rows of the 3801 DIAGONAL portion of the local submatrix (possibly different for each row) 3802 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3803 The size of this array is equal to the number of local rows, i.e 'm'. 3804 You must leave room for the diagonal entry even if it is zero. 3805 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3806 submatrix (same value is used for all local rows). 3807 - o_nnz - array containing the number of nonzeros in the various rows of the 3808 OFF-DIAGONAL portion of the local submatrix (possibly different for 3809 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3810 structure. The size of this array is equal to the number 3811 of local rows, i.e 'm'. 3812 3813 If the *_nnz parameter is given then the *_nz parameter is ignored 3814 3815 The AIJ format (also called the Yale sparse matrix format or 3816 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3817 storage. The stored row and column indices begin with zero. 3818 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3819 3820 The parallel matrix is partitioned such that the first m0 rows belong to 3821 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3822 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3823 3824 The DIAGONAL portion of the local submatrix of a processor can be defined 3825 as the submatrix which is obtained by extraction the part corresponding to 3826 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3827 first row that belongs to the processor, r2 is the last row belonging to 3828 the this processor, and c1-c2 is range of indices of the local part of a 3829 vector suitable for applying the matrix to. This is an mxn matrix. In the 3830 common case of a square matrix, the row and column ranges are the same and 3831 the DIAGONAL part is also square. The remaining portion of the local 3832 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3833 3834 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3835 3836 You can call MatGetInfo() to get information on how effective the preallocation was; 3837 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3838 You can also run with the option -info and look for messages with the string 3839 malloc in them to see if additional memory allocation was needed. 3840 3841 Example usage: 3842 3843 Consider the following 8x8 matrix with 34 non-zero values, that is 3844 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3845 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3846 as follows: 3847 3848 .vb 3849 1 2 0 | 0 3 0 | 0 4 3850 Proc0 0 5 6 | 7 0 0 | 8 0 3851 9 0 10 | 11 0 0 | 12 0 3852 ------------------------------------- 3853 13 0 14 | 15 16 17 | 0 0 3854 Proc1 0 18 0 | 19 20 21 | 0 0 3855 0 0 0 | 22 23 0 | 24 0 3856 ------------------------------------- 3857 Proc2 25 26 27 | 0 0 28 | 29 0 3858 30 0 0 | 31 32 33 | 0 34 3859 .ve 3860 3861 This can be represented as a collection of submatrices as: 3862 3863 .vb 3864 A B C 3865 D E F 3866 G H I 3867 .ve 3868 3869 Where the submatrices A,B,C are owned by proc0, D,E,F are 3870 owned by proc1, G,H,I are owned by proc2. 3871 3872 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3873 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3874 The 'M','N' parameters are 8,8, and have the same values on all procs. 3875 3876 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3877 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3878 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3879 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3880 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3881 matrix, ans [DF] as another SeqAIJ matrix. 3882 3883 When d_nz, o_nz parameters are specified, d_nz storage elements are 3884 allocated for every row of the local diagonal submatrix, and o_nz 3885 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3886 One way to choose d_nz and o_nz is to use the max nonzerors per local 3887 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3888 In this case, the values of d_nz,o_nz are: 3889 .vb 3890 proc0 : dnz = 2, o_nz = 2 3891 proc1 : dnz = 3, o_nz = 2 3892 proc2 : dnz = 1, o_nz = 4 3893 .ve 3894 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3895 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3896 for proc3. i.e we are using 12+15+10=37 storage locations to store 3897 34 values. 3898 3899 When d_nnz, o_nnz parameters are specified, the storage is specified 3900 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3901 In the above case the values for d_nnz,o_nnz are: 3902 .vb 3903 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3904 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3905 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3906 .ve 3907 Here the space allocated is sum of all the above values i.e 34, and 3908 hence pre-allocation is perfect. 3909 3910 Level: intermediate 3911 3912 .keywords: matrix, aij, compressed row, sparse, parallel 3913 3914 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3915 MPIAIJ, MatGetInfo() 3916 @*/ 3917 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3918 { 3919 PetscErrorCode ierr; 3920 3921 PetscFunctionBegin; 3922 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3923 PetscFunctionReturn(0); 3924 } 3925 3926 #undef __FUNCT__ 3927 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3928 /*@ 3929 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3930 CSR format the local rows. 3931 3932 Collective on MPI_Comm 3933 3934 Input Parameters: 3935 + comm - MPI communicator 3936 . m - number of local rows (Cannot be PETSC_DECIDE) 3937 . n - This value should be the same as the local size used in creating the 3938 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3939 calculated if N is given) For square matrices n is almost always m. 3940 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3941 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3942 . i - row indices 3943 . j - column indices 3944 - a - matrix values 3945 3946 Output Parameter: 3947 . mat - the matrix 3948 3949 Level: intermediate 3950 3951 Notes: 3952 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3953 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3954 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3955 3956 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3957 3958 The format which is used for the sparse matrix input, is equivalent to a 3959 row-major ordering.. i.e for the following matrix, the input data expected is 3960 as shown: 3961 3962 1 0 0 3963 2 0 3 P0 3964 ------- 3965 4 5 6 P1 3966 3967 Process0 [P0]: rows_owned=[0,1] 3968 i = {0,1,3} [size = nrow+1 = 2+1] 3969 j = {0,0,2} [size = nz = 6] 3970 v = {1,2,3} [size = nz = 6] 3971 3972 Process1 [P1]: rows_owned=[2] 3973 i = {0,3} [size = nrow+1 = 1+1] 3974 j = {0,1,2} [size = nz = 6] 3975 v = {4,5,6} [size = nz = 6] 3976 3977 .keywords: matrix, aij, compressed row, sparse, parallel 3978 3979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3980 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3981 @*/ 3982 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3983 { 3984 PetscErrorCode ierr; 3985 3986 PetscFunctionBegin; 3987 if (i[0]) { 3988 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3989 } 3990 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3991 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3992 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3993 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3994 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3995 PetscFunctionReturn(0); 3996 } 3997 3998 #undef __FUNCT__ 3999 #define __FUNCT__ "MatCreateMPIAIJ" 4000 /*@C 4001 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 4002 (the default parallel PETSc format). For good matrix assembly performance 4003 the user should preallocate the matrix storage by setting the parameters 4004 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4005 performance can be increased by more than a factor of 50. 4006 4007 Collective on MPI_Comm 4008 4009 Input Parameters: 4010 + comm - MPI communicator 4011 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4012 This value should be the same as the local size used in creating the 4013 y vector for the matrix-vector product y = Ax. 4014 . n - This value should be the same as the local size used in creating the 4015 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4016 calculated if N is given) For square matrices n is almost always m. 4017 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4018 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4019 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4020 (same value is used for all local rows) 4021 . d_nnz - array containing the number of nonzeros in the various rows of the 4022 DIAGONAL portion of the local submatrix (possibly different for each row) 4023 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4024 The size of this array is equal to the number of local rows, i.e 'm'. 4025 You must leave room for the diagonal entry even if it is zero. 4026 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4027 submatrix (same value is used for all local rows). 4028 - o_nnz - array containing the number of nonzeros in the various rows of the 4029 OFF-DIAGONAL portion of the local submatrix (possibly different for 4030 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4031 structure. The size of this array is equal to the number 4032 of local rows, i.e 'm'. 4033 4034 Output Parameter: 4035 . A - the matrix 4036 4037 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4038 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4039 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4040 4041 Notes: 4042 If the *_nnz parameter is given then the *_nz parameter is ignored 4043 4044 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4045 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4046 storage requirements for this matrix. 4047 4048 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4049 processor than it must be used on all processors that share the object for 4050 that argument. 4051 4052 The user MUST specify either the local or global matrix dimensions 4053 (possibly both). 4054 4055 The parallel matrix is partitioned across processors such that the 4056 first m0 rows belong to process 0, the next m1 rows belong to 4057 process 1, the next m2 rows belong to process 2 etc.. where 4058 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4059 values corresponding to [m x N] submatrix. 4060 4061 The columns are logically partitioned with the n0 columns belonging 4062 to 0th partition, the next n1 columns belonging to the next 4063 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4064 4065 The DIAGONAL portion of the local submatrix on any given processor 4066 is the submatrix corresponding to the rows and columns m,n 4067 corresponding to the given processor. i.e diagonal matrix on 4068 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4069 etc. The remaining portion of the local submatrix [m x (N-n)] 4070 constitute the OFF-DIAGONAL portion. The example below better 4071 illustrates this concept. 4072 4073 For a square global matrix we define each processor's diagonal portion 4074 to be its local rows and the corresponding columns (a square submatrix); 4075 each processor's off-diagonal portion encompasses the remainder of the 4076 local matrix (a rectangular submatrix). 4077 4078 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4079 4080 When calling this routine with a single process communicator, a matrix of 4081 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4082 type of communicator, use the construction mechanism: 4083 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4084 4085 By default, this format uses inodes (identical nodes) when possible. 4086 We search for consecutive rows with the same nonzero structure, thereby 4087 reusing matrix information to achieve increased efficiency. 4088 4089 Options Database Keys: 4090 + -mat_no_inode - Do not use inodes 4091 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4092 - -mat_aij_oneindex - Internally use indexing starting at 1 4093 rather than 0. Note that when calling MatSetValues(), 4094 the user still MUST index entries starting at 0! 4095 4096 4097 Example usage: 4098 4099 Consider the following 8x8 matrix with 34 non-zero values, that is 4100 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4101 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4102 as follows: 4103 4104 .vb 4105 1 2 0 | 0 3 0 | 0 4 4106 Proc0 0 5 6 | 7 0 0 | 8 0 4107 9 0 10 | 11 0 0 | 12 0 4108 ------------------------------------- 4109 13 0 14 | 15 16 17 | 0 0 4110 Proc1 0 18 0 | 19 20 21 | 0 0 4111 0 0 0 | 22 23 0 | 24 0 4112 ------------------------------------- 4113 Proc2 25 26 27 | 0 0 28 | 29 0 4114 30 0 0 | 31 32 33 | 0 34 4115 .ve 4116 4117 This can be represented as a collection of submatrices as: 4118 4119 .vb 4120 A B C 4121 D E F 4122 G H I 4123 .ve 4124 4125 Where the submatrices A,B,C are owned by proc0, D,E,F are 4126 owned by proc1, G,H,I are owned by proc2. 4127 4128 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4129 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4130 The 'M','N' parameters are 8,8, and have the same values on all procs. 4131 4132 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4133 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4134 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4135 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4136 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4137 matrix, ans [DF] as another SeqAIJ matrix. 4138 4139 When d_nz, o_nz parameters are specified, d_nz storage elements are 4140 allocated for every row of the local diagonal submatrix, and o_nz 4141 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4142 One way to choose d_nz and o_nz is to use the max nonzerors per local 4143 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4144 In this case, the values of d_nz,o_nz are: 4145 .vb 4146 proc0 : dnz = 2, o_nz = 2 4147 proc1 : dnz = 3, o_nz = 2 4148 proc2 : dnz = 1, o_nz = 4 4149 .ve 4150 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4151 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4152 for proc3. i.e we are using 12+15+10=37 storage locations to store 4153 34 values. 4154 4155 When d_nnz, o_nnz parameters are specified, the storage is specified 4156 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4157 In the above case the values for d_nnz,o_nnz are: 4158 .vb 4159 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4160 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4161 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4162 .ve 4163 Here the space allocated is sum of all the above values i.e 34, and 4164 hence pre-allocation is perfect. 4165 4166 Level: intermediate 4167 4168 .keywords: matrix, aij, compressed row, sparse, parallel 4169 4170 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4171 MPIAIJ, MatCreateMPIAIJWithArrays() 4172 @*/ 4173 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4174 { 4175 PetscErrorCode ierr; 4176 PetscMPIInt size; 4177 4178 PetscFunctionBegin; 4179 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4180 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4181 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4182 if (size > 1) { 4183 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4184 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4185 } else { 4186 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4187 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4188 } 4189 PetscFunctionReturn(0); 4190 } 4191 4192 #undef __FUNCT__ 4193 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4194 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4195 { 4196 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4197 4198 PetscFunctionBegin; 4199 *Ad = a->A; 4200 *Ao = a->B; 4201 *colmap = a->garray; 4202 PetscFunctionReturn(0); 4203 } 4204 4205 #undef __FUNCT__ 4206 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4207 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4208 { 4209 PetscErrorCode ierr; 4210 PetscInt i; 4211 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4212 4213 PetscFunctionBegin; 4214 if (coloring->ctype == IS_COLORING_GLOBAL) { 4215 ISColoringValue *allcolors,*colors; 4216 ISColoring ocoloring; 4217 4218 /* set coloring for diagonal portion */ 4219 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4220 4221 /* set coloring for off-diagonal portion */ 4222 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4223 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4224 for (i=0; i<a->B->cmap->n; i++) { 4225 colors[i] = allcolors[a->garray[i]]; 4226 } 4227 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4228 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4229 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4230 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4231 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4232 ISColoringValue *colors; 4233 PetscInt *larray; 4234 ISColoring ocoloring; 4235 4236 /* set coloring for diagonal portion */ 4237 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4238 for (i=0; i<a->A->cmap->n; i++) { 4239 larray[i] = i + A->cmap->rstart; 4240 } 4241 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4242 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4243 for (i=0; i<a->A->cmap->n; i++) { 4244 colors[i] = coloring->colors[larray[i]]; 4245 } 4246 ierr = PetscFree(larray);CHKERRQ(ierr); 4247 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4248 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4249 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4250 4251 /* set coloring for off-diagonal portion */ 4252 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4253 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4254 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4255 for (i=0; i<a->B->cmap->n; i++) { 4256 colors[i] = coloring->colors[larray[i]]; 4257 } 4258 ierr = PetscFree(larray);CHKERRQ(ierr); 4259 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4260 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4261 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4262 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4263 4264 PetscFunctionReturn(0); 4265 } 4266 4267 #if defined(PETSC_HAVE_ADIC) 4268 #undef __FUNCT__ 4269 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4270 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4271 { 4272 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4273 PetscErrorCode ierr; 4274 4275 PetscFunctionBegin; 4276 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4277 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4278 PetscFunctionReturn(0); 4279 } 4280 #endif 4281 4282 #undef __FUNCT__ 4283 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4284 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4285 { 4286 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4287 PetscErrorCode ierr; 4288 4289 PetscFunctionBegin; 4290 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4291 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4292 PetscFunctionReturn(0); 4293 } 4294 4295 #undef __FUNCT__ 4296 #define __FUNCT__ "MatMerge" 4297 /*@ 4298 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4299 matrices from each processor 4300 4301 Collective on MPI_Comm 4302 4303 Input Parameters: 4304 + comm - the communicators the parallel matrix will live on 4305 . inmat - the input sequential matrices 4306 . n - number of local columns (or PETSC_DECIDE) 4307 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4308 4309 Output Parameter: 4310 . outmat - the parallel matrix generated 4311 4312 Level: advanced 4313 4314 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4315 4316 @*/ 4317 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4318 { 4319 PetscErrorCode ierr; 4320 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4321 PetscInt *indx; 4322 PetscScalar *values; 4323 4324 PetscFunctionBegin; 4325 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4326 if (scall == MAT_INITIAL_MATRIX){ 4327 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4328 if (n == PETSC_DECIDE){ 4329 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4330 } 4331 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4332 rstart -= m; 4333 4334 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4335 for (i=0;i<m;i++) { 4336 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4337 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4338 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4339 } 4340 /* This routine will ONLY return MPIAIJ type matrix */ 4341 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4342 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4343 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4344 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4345 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4346 4347 } else if (scall == MAT_REUSE_MATRIX){ 4348 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4349 } else { 4350 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4351 } 4352 4353 for (i=0;i<m;i++) { 4354 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4355 Ii = i + rstart; 4356 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4357 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4358 } 4359 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4360 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4361 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4362 4363 PetscFunctionReturn(0); 4364 } 4365 4366 #undef __FUNCT__ 4367 #define __FUNCT__ "MatFileSplit" 4368 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4369 { 4370 PetscErrorCode ierr; 4371 PetscMPIInt rank; 4372 PetscInt m,N,i,rstart,nnz; 4373 size_t len; 4374 const PetscInt *indx; 4375 PetscViewer out; 4376 char *name; 4377 Mat B; 4378 const PetscScalar *values; 4379 4380 PetscFunctionBegin; 4381 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4382 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4383 /* Should this be the type of the diagonal block of A? */ 4384 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4385 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4386 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4387 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4388 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4389 for (i=0;i<m;i++) { 4390 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4391 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4392 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4393 } 4394 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4395 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4396 4397 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4398 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4399 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4400 sprintf(name,"%s.%d",outfile,rank); 4401 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4402 ierr = PetscFree(name); 4403 ierr = MatView(B,out);CHKERRQ(ierr); 4404 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4405 ierr = MatDestroy(&B);CHKERRQ(ierr); 4406 PetscFunctionReturn(0); 4407 } 4408 4409 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4410 #undef __FUNCT__ 4411 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4412 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4413 { 4414 PetscErrorCode ierr; 4415 Mat_Merge_SeqsToMPI *merge; 4416 PetscContainer container; 4417 4418 PetscFunctionBegin; 4419 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4420 if (container) { 4421 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4426 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4427 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4428 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4429 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4430 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4431 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4432 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4433 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4434 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4435 ierr = PetscFree(merge);CHKERRQ(ierr); 4436 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4437 } 4438 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4439 PetscFunctionReturn(0); 4440 } 4441 4442 #include <../src/mat/utils/freespace.h> 4443 #include <petscbt.h> 4444 4445 #undef __FUNCT__ 4446 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4447 /*@C 4448 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4449 matrices from each processor 4450 4451 Collective on MPI_Comm 4452 4453 Input Parameters: 4454 + comm - the communicators the parallel matrix will live on 4455 . seqmat - the input sequential matrices 4456 . m - number of local rows (or PETSC_DECIDE) 4457 . n - number of local columns (or PETSC_DECIDE) 4458 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4459 4460 Output Parameter: 4461 . mpimat - the parallel matrix generated 4462 4463 Level: advanced 4464 4465 Notes: 4466 The dimensions of the sequential matrix in each processor MUST be the same. 4467 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4468 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4469 @*/ 4470 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4471 { 4472 PetscErrorCode ierr; 4473 MPI_Comm comm=((PetscObject)mpimat)->comm; 4474 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4475 PetscMPIInt size,rank,taga,*len_s; 4476 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4477 PetscInt proc,m; 4478 PetscInt **buf_ri,**buf_rj; 4479 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4480 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4481 MPI_Request *s_waits,*r_waits; 4482 MPI_Status *status; 4483 MatScalar *aa=a->a; 4484 MatScalar **abuf_r,*ba_i; 4485 Mat_Merge_SeqsToMPI *merge; 4486 PetscContainer container; 4487 4488 PetscFunctionBegin; 4489 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4490 4491 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4492 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4493 4494 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4495 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4496 4497 bi = merge->bi; 4498 bj = merge->bj; 4499 buf_ri = merge->buf_ri; 4500 buf_rj = merge->buf_rj; 4501 4502 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4503 owners = merge->rowmap->range; 4504 len_s = merge->len_s; 4505 4506 /* send and recv matrix values */ 4507 /*-----------------------------*/ 4508 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4509 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4510 4511 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4512 for (proc=0,k=0; proc<size; proc++){ 4513 if (!len_s[proc]) continue; 4514 i = owners[proc]; 4515 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4516 k++; 4517 } 4518 4519 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4520 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4521 ierr = PetscFree(status);CHKERRQ(ierr); 4522 4523 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4524 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4525 4526 /* insert mat values of mpimat */ 4527 /*----------------------------*/ 4528 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4529 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4530 4531 for (k=0; k<merge->nrecv; k++){ 4532 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4533 nrows = *(buf_ri_k[k]); 4534 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4535 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4536 } 4537 4538 /* set values of ba */ 4539 m = merge->rowmap->n; 4540 for (i=0; i<m; i++) { 4541 arow = owners[rank] + i; 4542 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4543 bnzi = bi[i+1] - bi[i]; 4544 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4545 4546 /* add local non-zero vals of this proc's seqmat into ba */ 4547 anzi = ai[arow+1] - ai[arow]; 4548 aj = a->j + ai[arow]; 4549 aa = a->a + ai[arow]; 4550 nextaj = 0; 4551 for (j=0; nextaj<anzi; j++){ 4552 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4553 ba_i[j] += aa[nextaj++]; 4554 } 4555 } 4556 4557 /* add received vals into ba */ 4558 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4559 /* i-th row */ 4560 if (i == *nextrow[k]) { 4561 anzi = *(nextai[k]+1) - *nextai[k]; 4562 aj = buf_rj[k] + *(nextai[k]); 4563 aa = abuf_r[k] + *(nextai[k]); 4564 nextaj = 0; 4565 for (j=0; nextaj<anzi; j++){ 4566 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4567 ba_i[j] += aa[nextaj++]; 4568 } 4569 } 4570 nextrow[k]++; nextai[k]++; 4571 } 4572 } 4573 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4574 } 4575 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4576 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4577 4578 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4579 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4580 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4581 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4582 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4583 PetscFunctionReturn(0); 4584 } 4585 4586 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4587 4588 #undef __FUNCT__ 4589 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4590 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4591 { 4592 PetscErrorCode ierr; 4593 Mat B_mpi; 4594 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4595 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4596 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4597 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4598 PetscInt len,proc,*dnz,*onz; 4599 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4600 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4601 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4602 MPI_Status *status; 4603 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4604 PetscBT lnkbt; 4605 Mat_Merge_SeqsToMPI *merge; 4606 PetscContainer container; 4607 4608 PetscFunctionBegin; 4609 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4610 4611 /* make sure it is a PETSc comm */ 4612 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4613 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4614 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4615 4616 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4617 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4618 4619 /* determine row ownership */ 4620 /*---------------------------------------------------------*/ 4621 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4622 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4623 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4624 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4625 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4626 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4627 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4628 4629 m = merge->rowmap->n; 4630 M = merge->rowmap->N; 4631 owners = merge->rowmap->range; 4632 4633 /* determine the number of messages to send, their lengths */ 4634 /*---------------------------------------------------------*/ 4635 len_s = merge->len_s; 4636 4637 len = 0; /* length of buf_si[] */ 4638 merge->nsend = 0; 4639 for (proc=0; proc<size; proc++){ 4640 len_si[proc] = 0; 4641 if (proc == rank){ 4642 len_s[proc] = 0; 4643 } else { 4644 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4645 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4646 } 4647 if (len_s[proc]) { 4648 merge->nsend++; 4649 nrows = 0; 4650 for (i=owners[proc]; i<owners[proc+1]; i++){ 4651 if (ai[i+1] > ai[i]) nrows++; 4652 } 4653 len_si[proc] = 2*(nrows+1); 4654 len += len_si[proc]; 4655 } 4656 } 4657 4658 /* determine the number and length of messages to receive for ij-structure */ 4659 /*-------------------------------------------------------------------------*/ 4660 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4661 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4662 4663 /* post the Irecv of j-structure */ 4664 /*-------------------------------*/ 4665 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4666 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4667 4668 /* post the Isend of j-structure */ 4669 /*--------------------------------*/ 4670 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4671 4672 for (proc=0, k=0; proc<size; proc++){ 4673 if (!len_s[proc]) continue; 4674 i = owners[proc]; 4675 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4676 k++; 4677 } 4678 4679 /* receives and sends of j-structure are complete */ 4680 /*------------------------------------------------*/ 4681 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4682 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4683 4684 /* send and recv i-structure */ 4685 /*---------------------------*/ 4686 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4687 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4688 4689 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4690 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4691 for (proc=0,k=0; proc<size; proc++){ 4692 if (!len_s[proc]) continue; 4693 /* form outgoing message for i-structure: 4694 buf_si[0]: nrows to be sent 4695 [1:nrows]: row index (global) 4696 [nrows+1:2*nrows+1]: i-structure index 4697 */ 4698 /*-------------------------------------------*/ 4699 nrows = len_si[proc]/2 - 1; 4700 buf_si_i = buf_si + nrows+1; 4701 buf_si[0] = nrows; 4702 buf_si_i[0] = 0; 4703 nrows = 0; 4704 for (i=owners[proc]; i<owners[proc+1]; i++){ 4705 anzi = ai[i+1] - ai[i]; 4706 if (anzi) { 4707 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4708 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4709 nrows++; 4710 } 4711 } 4712 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4713 k++; 4714 buf_si += len_si[proc]; 4715 } 4716 4717 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4718 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4719 4720 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4721 for (i=0; i<merge->nrecv; i++){ 4722 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4723 } 4724 4725 ierr = PetscFree(len_si);CHKERRQ(ierr); 4726 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4727 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4728 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4729 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4730 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4731 ierr = PetscFree(status);CHKERRQ(ierr); 4732 4733 /* compute a local seq matrix in each processor */ 4734 /*----------------------------------------------*/ 4735 /* allocate bi array and free space for accumulating nonzero column info */ 4736 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4737 bi[0] = 0; 4738 4739 /* create and initialize a linked list */ 4740 nlnk = N+1; 4741 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4742 4743 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4744 len = 0; 4745 len = ai[owners[rank+1]] - ai[owners[rank]]; 4746 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4747 current_space = free_space; 4748 4749 /* determine symbolic info for each local row */ 4750 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4751 4752 for (k=0; k<merge->nrecv; k++){ 4753 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4754 nrows = *buf_ri_k[k]; 4755 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4756 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4757 } 4758 4759 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4760 len = 0; 4761 for (i=0;i<m;i++) { 4762 bnzi = 0; 4763 /* add local non-zero cols of this proc's seqmat into lnk */ 4764 arow = owners[rank] + i; 4765 anzi = ai[arow+1] - ai[arow]; 4766 aj = a->j + ai[arow]; 4767 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4768 bnzi += nlnk; 4769 /* add received col data into lnk */ 4770 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4771 if (i == *nextrow[k]) { /* i-th row */ 4772 anzi = *(nextai[k]+1) - *nextai[k]; 4773 aj = buf_rj[k] + *nextai[k]; 4774 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4775 bnzi += nlnk; 4776 nextrow[k]++; nextai[k]++; 4777 } 4778 } 4779 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4780 4781 /* if free space is not available, make more free space */ 4782 if (current_space->local_remaining<bnzi) { 4783 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4784 nspacedouble++; 4785 } 4786 /* copy data into free space, then initialize lnk */ 4787 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4788 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4789 4790 current_space->array += bnzi; 4791 current_space->local_used += bnzi; 4792 current_space->local_remaining -= bnzi; 4793 4794 bi[i+1] = bi[i] + bnzi; 4795 } 4796 4797 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4798 4799 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4800 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4801 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4802 4803 /* create symbolic parallel matrix B_mpi */ 4804 /*---------------------------------------*/ 4805 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4806 if (n==PETSC_DECIDE) { 4807 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4808 } else { 4809 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4810 } 4811 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4812 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4813 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4814 4815 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4816 B_mpi->assembled = PETSC_FALSE; 4817 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4818 merge->bi = bi; 4819 merge->bj = bj; 4820 merge->buf_ri = buf_ri; 4821 merge->buf_rj = buf_rj; 4822 merge->coi = PETSC_NULL; 4823 merge->coj = PETSC_NULL; 4824 merge->owners_co = PETSC_NULL; 4825 4826 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4827 4828 /* attach the supporting struct to B_mpi for reuse */ 4829 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4830 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4831 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4832 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4833 *mpimat = B_mpi; 4834 4835 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4836 PetscFunctionReturn(0); 4837 } 4838 4839 #undef __FUNCT__ 4840 #define __FUNCT__ "MatMerge_SeqsToMPI" 4841 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4842 { 4843 PetscErrorCode ierr; 4844 4845 PetscFunctionBegin; 4846 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4847 if (scall == MAT_INITIAL_MATRIX){ 4848 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4849 } 4850 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4851 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4852 PetscFunctionReturn(0); 4853 } 4854 4855 #undef __FUNCT__ 4856 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4857 /*@ 4858 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4859 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4860 with MatGetSize() 4861 4862 Not Collective 4863 4864 Input Parameters: 4865 + A - the matrix 4866 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4867 4868 Output Parameter: 4869 . A_loc - the local sequential matrix generated 4870 4871 Level: developer 4872 4873 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4874 4875 @*/ 4876 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4877 { 4878 PetscErrorCode ierr; 4879 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4880 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4881 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4882 MatScalar *aa=a->a,*ba=b->a,*cam; 4883 PetscScalar *ca; 4884 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4885 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4886 PetscBool match; 4887 4888 PetscFunctionBegin; 4889 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4890 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4891 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4892 if (scall == MAT_INITIAL_MATRIX){ 4893 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4894 ci[0] = 0; 4895 for (i=0; i<am; i++){ 4896 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4897 } 4898 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4899 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4900 k = 0; 4901 for (i=0; i<am; i++) { 4902 ncols_o = bi[i+1] - bi[i]; 4903 ncols_d = ai[i+1] - ai[i]; 4904 /* off-diagonal portion of A */ 4905 for (jo=0; jo<ncols_o; jo++) { 4906 col = cmap[*bj]; 4907 if (col >= cstart) break; 4908 cj[k] = col; bj++; 4909 ca[k++] = *ba++; 4910 } 4911 /* diagonal portion of A */ 4912 for (j=0; j<ncols_d; j++) { 4913 cj[k] = cstart + *aj++; 4914 ca[k++] = *aa++; 4915 } 4916 /* off-diagonal portion of A */ 4917 for (j=jo; j<ncols_o; j++) { 4918 cj[k] = cmap[*bj++]; 4919 ca[k++] = *ba++; 4920 } 4921 } 4922 /* put together the new matrix */ 4923 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4924 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4925 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4926 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4927 mat->free_a = PETSC_TRUE; 4928 mat->free_ij = PETSC_TRUE; 4929 mat->nonew = 0; 4930 } else if (scall == MAT_REUSE_MATRIX){ 4931 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4932 ci = mat->i; cj = mat->j; cam = mat->a; 4933 for (i=0; i<am; i++) { 4934 /* off-diagonal portion of A */ 4935 ncols_o = bi[i+1] - bi[i]; 4936 for (jo=0; jo<ncols_o; jo++) { 4937 col = cmap[*bj]; 4938 if (col >= cstart) break; 4939 *cam++ = *ba++; bj++; 4940 } 4941 /* diagonal portion of A */ 4942 ncols_d = ai[i+1] - ai[i]; 4943 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4944 /* off-diagonal portion of A */ 4945 for (j=jo; j<ncols_o; j++) { 4946 *cam++ = *ba++; bj++; 4947 } 4948 } 4949 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4950 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4951 PetscFunctionReturn(0); 4952 } 4953 4954 #undef __FUNCT__ 4955 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4956 /*@C 4957 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4958 4959 Not Collective 4960 4961 Input Parameters: 4962 + A - the matrix 4963 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4964 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4965 4966 Output Parameter: 4967 . A_loc - the local sequential matrix generated 4968 4969 Level: developer 4970 4971 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4972 4973 @*/ 4974 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4975 { 4976 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4977 PetscErrorCode ierr; 4978 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4979 IS isrowa,iscola; 4980 Mat *aloc; 4981 PetscBool match; 4982 4983 PetscFunctionBegin; 4984 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4985 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4986 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4987 if (!row){ 4988 start = A->rmap->rstart; end = A->rmap->rend; 4989 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4990 } else { 4991 isrowa = *row; 4992 } 4993 if (!col){ 4994 start = A->cmap->rstart; 4995 cmap = a->garray; 4996 nzA = a->A->cmap->n; 4997 nzB = a->B->cmap->n; 4998 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4999 ncols = 0; 5000 for (i=0; i<nzB; i++) { 5001 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5002 else break; 5003 } 5004 imark = i; 5005 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5006 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5007 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5008 } else { 5009 iscola = *col; 5010 } 5011 if (scall != MAT_INITIAL_MATRIX){ 5012 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5013 aloc[0] = *A_loc; 5014 } 5015 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5016 *A_loc = aloc[0]; 5017 ierr = PetscFree(aloc);CHKERRQ(ierr); 5018 if (!row){ 5019 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5020 } 5021 if (!col){ 5022 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5023 } 5024 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5025 PetscFunctionReturn(0); 5026 } 5027 5028 #undef __FUNCT__ 5029 #define __FUNCT__ "MatGetBrowsOfAcols" 5030 /*@C 5031 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5032 5033 Collective on Mat 5034 5035 Input Parameters: 5036 + A,B - the matrices in mpiaij format 5037 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5038 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5039 5040 Output Parameter: 5041 + rowb, colb - index sets of rows and columns of B to extract 5042 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 5043 - B_seq - the sequential matrix generated 5044 5045 Level: developer 5046 5047 @*/ 5048 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 5049 { 5050 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5051 PetscErrorCode ierr; 5052 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5053 IS isrowb,iscolb; 5054 Mat *bseq; 5055 5056 PetscFunctionBegin; 5057 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5058 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5059 } 5060 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5061 5062 if (scall == MAT_INITIAL_MATRIX){ 5063 start = A->cmap->rstart; 5064 cmap = a->garray; 5065 nzA = a->A->cmap->n; 5066 nzB = a->B->cmap->n; 5067 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5068 ncols = 0; 5069 for (i=0; i<nzB; i++) { /* row < local row index */ 5070 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5071 else break; 5072 } 5073 imark = i; 5074 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5075 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5076 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5077 *brstart = imark; 5078 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5079 } else { 5080 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5081 isrowb = *rowb; iscolb = *colb; 5082 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5083 bseq[0] = *B_seq; 5084 } 5085 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5086 *B_seq = bseq[0]; 5087 ierr = PetscFree(bseq);CHKERRQ(ierr); 5088 if (!rowb){ 5089 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5090 } else { 5091 *rowb = isrowb; 5092 } 5093 if (!colb){ 5094 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5095 } else { 5096 *colb = iscolb; 5097 } 5098 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5099 PetscFunctionReturn(0); 5100 } 5101 5102 #undef __FUNCT__ 5103 #define __FUNCT__ "MatGetBrowsOfAoCols" 5104 /*@C 5105 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5106 of the OFF-DIAGONAL portion of local A 5107 5108 Collective on Mat 5109 5110 Input Parameters: 5111 + A,B - the matrices in mpiaij format 5112 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5113 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5114 . startsj_r - similar to startsj for receives 5115 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5116 5117 Output Parameter: 5118 + B_oth - the sequential matrix generated 5119 5120 Level: developer 5121 5122 @*/ 5123 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5124 { 5125 VecScatter_MPI_General *gen_to,*gen_from; 5126 PetscErrorCode ierr; 5127 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5128 Mat_SeqAIJ *b_oth; 5129 VecScatter ctx=a->Mvctx; 5130 MPI_Comm comm=((PetscObject)ctx)->comm; 5131 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5132 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5133 PetscScalar *rvalues,*svalues; 5134 MatScalar *b_otha,*bufa,*bufA; 5135 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5136 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5137 MPI_Status *sstatus,rstatus; 5138 PetscMPIInt jj; 5139 PetscInt *cols,sbs,rbs; 5140 PetscScalar *vals; 5141 5142 PetscFunctionBegin; 5143 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5144 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5145 } 5146 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5147 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5148 5149 gen_to = (VecScatter_MPI_General*)ctx->todata; 5150 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5151 rvalues = gen_from->values; /* holds the length of receiving row */ 5152 svalues = gen_to->values; /* holds the length of sending row */ 5153 nrecvs = gen_from->n; 5154 nsends = gen_to->n; 5155 5156 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5157 srow = gen_to->indices; /* local row index to be sent */ 5158 sstarts = gen_to->starts; 5159 sprocs = gen_to->procs; 5160 sstatus = gen_to->sstatus; 5161 sbs = gen_to->bs; 5162 rstarts = gen_from->starts; 5163 rprocs = gen_from->procs; 5164 rbs = gen_from->bs; 5165 5166 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5167 if (scall == MAT_INITIAL_MATRIX){ 5168 /* i-array */ 5169 /*---------*/ 5170 /* post receives */ 5171 for (i=0; i<nrecvs; i++){ 5172 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5173 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5174 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5175 } 5176 5177 /* pack the outgoing message */ 5178 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5179 sstartsj[0] = 0; rstartsj[0] = 0; 5180 len = 0; /* total length of j or a array to be sent */ 5181 k = 0; 5182 for (i=0; i<nsends; i++){ 5183 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5184 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5185 for (j=0; j<nrows; j++) { 5186 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5187 for (l=0; l<sbs; l++){ 5188 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5189 rowlen[j*sbs+l] = ncols; 5190 len += ncols; 5191 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5192 } 5193 k++; 5194 } 5195 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5196 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5197 } 5198 /* recvs and sends of i-array are completed */ 5199 i = nrecvs; 5200 while (i--) { 5201 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5202 } 5203 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5204 5205 /* allocate buffers for sending j and a arrays */ 5206 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5207 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5208 5209 /* create i-array of B_oth */ 5210 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5211 b_othi[0] = 0; 5212 len = 0; /* total length of j or a array to be received */ 5213 k = 0; 5214 for (i=0; i<nrecvs; i++){ 5215 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5216 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5217 for (j=0; j<nrows; j++) { 5218 b_othi[k+1] = b_othi[k] + rowlen[j]; 5219 len += rowlen[j]; k++; 5220 } 5221 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5222 } 5223 5224 /* allocate space for j and a arrrays of B_oth */ 5225 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5226 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5227 5228 /* j-array */ 5229 /*---------*/ 5230 /* post receives of j-array */ 5231 for (i=0; i<nrecvs; i++){ 5232 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5233 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5234 } 5235 5236 /* pack the outgoing message j-array */ 5237 k = 0; 5238 for (i=0; i<nsends; i++){ 5239 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5240 bufJ = bufj+sstartsj[i]; 5241 for (j=0; j<nrows; j++) { 5242 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5243 for (ll=0; ll<sbs; ll++){ 5244 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5245 for (l=0; l<ncols; l++){ 5246 *bufJ++ = cols[l]; 5247 } 5248 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5249 } 5250 } 5251 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5252 } 5253 5254 /* recvs and sends of j-array are completed */ 5255 i = nrecvs; 5256 while (i--) { 5257 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5258 } 5259 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5260 } else if (scall == MAT_REUSE_MATRIX){ 5261 sstartsj = *startsj; 5262 rstartsj = *startsj_r; 5263 bufa = *bufa_ptr; 5264 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5265 b_otha = b_oth->a; 5266 } else { 5267 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5268 } 5269 5270 /* a-array */ 5271 /*---------*/ 5272 /* post receives of a-array */ 5273 for (i=0; i<nrecvs; i++){ 5274 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5275 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5276 } 5277 5278 /* pack the outgoing message a-array */ 5279 k = 0; 5280 for (i=0; i<nsends; i++){ 5281 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5282 bufA = bufa+sstartsj[i]; 5283 for (j=0; j<nrows; j++) { 5284 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5285 for (ll=0; ll<sbs; ll++){ 5286 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5287 for (l=0; l<ncols; l++){ 5288 *bufA++ = vals[l]; 5289 } 5290 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5291 } 5292 } 5293 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5294 } 5295 /* recvs and sends of a-array are completed */ 5296 i = nrecvs; 5297 while (i--) { 5298 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5299 } 5300 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5301 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5302 5303 if (scall == MAT_INITIAL_MATRIX){ 5304 /* put together the new matrix */ 5305 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5306 5307 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5308 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5309 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5310 b_oth->free_a = PETSC_TRUE; 5311 b_oth->free_ij = PETSC_TRUE; 5312 b_oth->nonew = 0; 5313 5314 ierr = PetscFree(bufj);CHKERRQ(ierr); 5315 if (!startsj || !bufa_ptr){ 5316 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5317 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5318 } else { 5319 *startsj = sstartsj; 5320 *startsj_r = rstartsj; 5321 *bufa_ptr = bufa; 5322 } 5323 } 5324 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5325 PetscFunctionReturn(0); 5326 } 5327 5328 #undef __FUNCT__ 5329 #define __FUNCT__ "MatGetCommunicationStructs" 5330 /*@C 5331 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5332 5333 Not Collective 5334 5335 Input Parameters: 5336 . A - The matrix in mpiaij format 5337 5338 Output Parameter: 5339 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5340 . colmap - A map from global column index to local index into lvec 5341 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5342 5343 Level: developer 5344 5345 @*/ 5346 #if defined (PETSC_USE_CTABLE) 5347 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5348 #else 5349 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5350 #endif 5351 { 5352 Mat_MPIAIJ *a; 5353 5354 PetscFunctionBegin; 5355 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5356 PetscValidPointer(lvec, 2); 5357 PetscValidPointer(colmap, 3); 5358 PetscValidPointer(multScatter, 4); 5359 a = (Mat_MPIAIJ *) A->data; 5360 if (lvec) *lvec = a->lvec; 5361 if (colmap) *colmap = a->colmap; 5362 if (multScatter) *multScatter = a->Mvctx; 5363 PetscFunctionReturn(0); 5364 } 5365 5366 EXTERN_C_BEGIN 5367 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5368 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5369 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5370 EXTERN_C_END 5371 5372 #undef __FUNCT__ 5373 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5374 /* 5375 Computes (B'*A')' since computing B*A directly is untenable 5376 5377 n p p 5378 ( ) ( ) ( ) 5379 m ( A ) * n ( B ) = m ( C ) 5380 ( ) ( ) ( ) 5381 5382 */ 5383 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5384 { 5385 PetscErrorCode ierr; 5386 Mat At,Bt,Ct; 5387 5388 PetscFunctionBegin; 5389 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5390 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5391 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5392 ierr = MatDestroy(&At);CHKERRQ(ierr); 5393 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5394 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5395 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5396 PetscFunctionReturn(0); 5397 } 5398 5399 #undef __FUNCT__ 5400 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5401 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5402 { 5403 PetscErrorCode ierr; 5404 PetscInt m=A->rmap->n,n=B->cmap->n; 5405 Mat Cmat; 5406 5407 PetscFunctionBegin; 5408 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5409 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5410 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5411 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5412 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5413 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5414 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5415 *C = Cmat; 5416 PetscFunctionReturn(0); 5417 } 5418 5419 /* ----------------------------------------------------------------*/ 5420 #undef __FUNCT__ 5421 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5422 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5423 { 5424 PetscErrorCode ierr; 5425 5426 PetscFunctionBegin; 5427 if (scall == MAT_INITIAL_MATRIX){ 5428 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5429 } 5430 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5431 PetscFunctionReturn(0); 5432 } 5433 5434 EXTERN_C_BEGIN 5435 #if defined(PETSC_HAVE_MUMPS) 5436 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5437 #endif 5438 #if defined(PETSC_HAVE_PASTIX) 5439 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5440 #endif 5441 #if defined(PETSC_HAVE_SUPERLU_DIST) 5442 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5443 #endif 5444 #if defined(PETSC_HAVE_SPOOLES) 5445 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5446 #endif 5447 EXTERN_C_END 5448 5449 /*MC 5450 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5451 5452 Options Database Keys: 5453 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5454 5455 Level: beginner 5456 5457 .seealso: MatCreateMPIAIJ() 5458 M*/ 5459 5460 EXTERN_C_BEGIN 5461 #undef __FUNCT__ 5462 #define __FUNCT__ "MatCreate_MPIAIJ" 5463 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5464 { 5465 Mat_MPIAIJ *b; 5466 PetscErrorCode ierr; 5467 PetscMPIInt size; 5468 5469 PetscFunctionBegin; 5470 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5471 5472 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5473 B->data = (void*)b; 5474 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5475 B->rmap->bs = 1; 5476 B->assembled = PETSC_FALSE; 5477 5478 B->insertmode = NOT_SET_VALUES; 5479 b->size = size; 5480 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5481 5482 /* build cache for off array entries formed */ 5483 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5484 b->donotstash = PETSC_FALSE; 5485 b->colmap = 0; 5486 b->garray = 0; 5487 b->roworiented = PETSC_TRUE; 5488 5489 /* stuff used for matrix vector multiply */ 5490 b->lvec = PETSC_NULL; 5491 b->Mvctx = PETSC_NULL; 5492 5493 /* stuff for MatGetRow() */ 5494 b->rowindices = 0; 5495 b->rowvalues = 0; 5496 b->getrowactive = PETSC_FALSE; 5497 5498 #if defined(PETSC_HAVE_SPOOLES) 5499 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5500 "MatGetFactor_mpiaij_spooles", 5501 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5502 #endif 5503 #if defined(PETSC_HAVE_MUMPS) 5504 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5505 "MatGetFactor_aij_mumps", 5506 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5507 #endif 5508 #if defined(PETSC_HAVE_PASTIX) 5509 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5510 "MatGetFactor_mpiaij_pastix", 5511 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5512 #endif 5513 #if defined(PETSC_HAVE_SUPERLU_DIST) 5514 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5515 "MatGetFactor_mpiaij_superlu_dist", 5516 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5517 #endif 5518 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5519 "MatStoreValues_MPIAIJ", 5520 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5521 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5522 "MatRetrieveValues_MPIAIJ", 5523 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5524 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5525 "MatGetDiagonalBlock_MPIAIJ", 5526 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5527 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5528 "MatIsTranspose_MPIAIJ", 5529 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5530 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5531 "MatMPIAIJSetPreallocation_MPIAIJ", 5532 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5533 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5534 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5535 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5536 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5537 "MatDiagonalScaleLocal_MPIAIJ", 5538 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5539 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5540 "MatConvert_MPIAIJ_MPIAIJPERM", 5541 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5542 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5543 "MatConvert_MPIAIJ_MPIAIJCRL", 5544 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5545 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5546 "MatConvert_MPIAIJ_MPISBAIJ", 5547 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5548 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5549 "MatMatMult_MPIDense_MPIAIJ", 5550 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5551 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5552 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5553 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5554 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5555 "MatMatMultNumeric_MPIDense_MPIAIJ", 5556 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5557 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5558 PetscFunctionReturn(0); 5559 } 5560 EXTERN_C_END 5561 5562 #undef __FUNCT__ 5563 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5564 /*@ 5565 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5566 and "off-diagonal" part of the matrix in CSR format. 5567 5568 Collective on MPI_Comm 5569 5570 Input Parameters: 5571 + comm - MPI communicator 5572 . m - number of local rows (Cannot be PETSC_DECIDE) 5573 . n - This value should be the same as the local size used in creating the 5574 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5575 calculated if N is given) For square matrices n is almost always m. 5576 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5577 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5578 . i - row indices for "diagonal" portion of matrix 5579 . j - column indices 5580 . a - matrix values 5581 . oi - row indices for "off-diagonal" portion of matrix 5582 . oj - column indices 5583 - oa - matrix values 5584 5585 Output Parameter: 5586 . mat - the matrix 5587 5588 Level: advanced 5589 5590 Notes: 5591 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5592 must free the arrays once the matrix has been destroyed and not before. 5593 5594 The i and j indices are 0 based 5595 5596 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5597 5598 This sets local rows and cannot be used to set off-processor values. 5599 5600 You cannot later use MatSetValues() to change values in this matrix. 5601 5602 .keywords: matrix, aij, compressed row, sparse, parallel 5603 5604 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5605 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5606 @*/ 5607 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5608 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5609 { 5610 PetscErrorCode ierr; 5611 Mat_MPIAIJ *maij; 5612 5613 PetscFunctionBegin; 5614 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5615 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5616 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5617 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5618 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5619 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5620 maij = (Mat_MPIAIJ*) (*mat)->data; 5621 maij->donotstash = PETSC_TRUE; 5622 (*mat)->preallocated = PETSC_TRUE; 5623 5624 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5625 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5626 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5627 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5628 5629 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5630 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5631 5632 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5633 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5634 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5635 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5636 5637 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5638 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5639 PetscFunctionReturn(0); 5640 } 5641 5642 /* 5643 Special version for direct calls from Fortran 5644 */ 5645 #include <private/fortranimpl.h> 5646 5647 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5648 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5650 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5651 #endif 5652 5653 /* Change these macros so can be used in void function */ 5654 #undef CHKERRQ 5655 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5656 #undef SETERRQ2 5657 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5658 #undef SETERRQ 5659 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5660 5661 EXTERN_C_BEGIN 5662 #undef __FUNCT__ 5663 #define __FUNCT__ "matsetvaluesmpiaij_" 5664 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5665 { 5666 Mat mat = *mmat; 5667 PetscInt m = *mm, n = *mn; 5668 InsertMode addv = *maddv; 5669 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5670 PetscScalar value; 5671 PetscErrorCode ierr; 5672 5673 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5674 if (mat->insertmode == NOT_SET_VALUES) { 5675 mat->insertmode = addv; 5676 } 5677 #if defined(PETSC_USE_DEBUG) 5678 else if (mat->insertmode != addv) { 5679 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5680 } 5681 #endif 5682 { 5683 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5684 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5685 PetscBool roworiented = aij->roworiented; 5686 5687 /* Some Variables required in the macro */ 5688 Mat A = aij->A; 5689 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5690 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5691 MatScalar *aa = a->a; 5692 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5693 Mat B = aij->B; 5694 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5695 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5696 MatScalar *ba = b->a; 5697 5698 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5699 PetscInt nonew = a->nonew; 5700 MatScalar *ap1,*ap2; 5701 5702 PetscFunctionBegin; 5703 for (i=0; i<m; i++) { 5704 if (im[i] < 0) continue; 5705 #if defined(PETSC_USE_DEBUG) 5706 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5707 #endif 5708 if (im[i] >= rstart && im[i] < rend) { 5709 row = im[i] - rstart; 5710 lastcol1 = -1; 5711 rp1 = aj + ai[row]; 5712 ap1 = aa + ai[row]; 5713 rmax1 = aimax[row]; 5714 nrow1 = ailen[row]; 5715 low1 = 0; 5716 high1 = nrow1; 5717 lastcol2 = -1; 5718 rp2 = bj + bi[row]; 5719 ap2 = ba + bi[row]; 5720 rmax2 = bimax[row]; 5721 nrow2 = bilen[row]; 5722 low2 = 0; 5723 high2 = nrow2; 5724 5725 for (j=0; j<n; j++) { 5726 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5727 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5728 if (in[j] >= cstart && in[j] < cend){ 5729 col = in[j] - cstart; 5730 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5731 } else if (in[j] < 0) continue; 5732 #if defined(PETSC_USE_DEBUG) 5733 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5734 #endif 5735 else { 5736 if (mat->was_assembled) { 5737 if (!aij->colmap) { 5738 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5739 } 5740 #if defined (PETSC_USE_CTABLE) 5741 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5742 col--; 5743 #else 5744 col = aij->colmap[in[j]] - 1; 5745 #endif 5746 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5747 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5748 col = in[j]; 5749 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5750 B = aij->B; 5751 b = (Mat_SeqAIJ*)B->data; 5752 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5753 rp2 = bj + bi[row]; 5754 ap2 = ba + bi[row]; 5755 rmax2 = bimax[row]; 5756 nrow2 = bilen[row]; 5757 low2 = 0; 5758 high2 = nrow2; 5759 bm = aij->B->rmap->n; 5760 ba = b->a; 5761 } 5762 } else col = in[j]; 5763 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5764 } 5765 } 5766 } else { 5767 if (!aij->donotstash) { 5768 if (roworiented) { 5769 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5770 } else { 5771 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5772 } 5773 } 5774 } 5775 }} 5776 PetscFunctionReturnVoid(); 5777 } 5778 EXTERN_C_END 5779 5780