1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 if (x) { 1004 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 } 1009 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1010 1011 /* remove zeroed rows of off diagonal matrix */ 1012 ii = aij->i; 1013 for (i=0; i<slen; i++) { 1014 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1015 } 1016 1017 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1018 if (aij->compressedrow.use){ 1019 m = aij->compressedrow.nrows; 1020 ii = aij->compressedrow.i; 1021 ridx = aij->compressedrow.rindex; 1022 for (i=0; i<m; i++){ 1023 n = ii[i+1] - ii[i]; 1024 aj = aij->j + ii[i]; 1025 aa = aij->a + ii[i]; 1026 1027 for (j=0; j<n; j++) { 1028 if (PetscAbsScalar(mask[*aj])) { 1029 if (b) bb[*ridx] -= *aa*xx[*aj]; 1030 *aa = 0.0; 1031 } 1032 aa++; 1033 aj++; 1034 } 1035 ridx++; 1036 } 1037 } else { /* do not use compressed row format */ 1038 m = l->B->rmap->n; 1039 for (i=0; i<m; i++) { 1040 n = ii[i+1] - ii[i]; 1041 aj = aij->j + ii[i]; 1042 aa = aij->a + ii[i]; 1043 for (j=0; j<n; j++) { 1044 if (PetscAbsScalar(mask[*aj])) { 1045 if (b) bb[i] -= *aa*xx[*aj]; 1046 *aa = 0.0; 1047 } 1048 aa++; 1049 aj++; 1050 } 1051 } 1052 } 1053 if (x) { 1054 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1055 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1056 } 1057 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1058 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1059 ierr = PetscFree(lrows);CHKERRQ(ierr); 1060 1061 /* wait on sends */ 1062 if (nsends) { 1063 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1064 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1065 ierr = PetscFree(send_status);CHKERRQ(ierr); 1066 } 1067 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1068 ierr = PetscFree(svalues);CHKERRQ(ierr); 1069 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatMult_MPIAIJ" 1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 PetscInt nt; 1080 1081 PetscFunctionBegin; 1082 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1083 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1084 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1085 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1094 { 1095 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 PetscBool merged; 1125 1126 PetscFunctionBegin; 1127 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1128 /* do nondiagonal part */ 1129 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1130 if (!merged) { 1131 /* send it on its way */ 1132 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1133 /* do local part */ 1134 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1135 /* receive remote parts: note this assumes the values are not actually */ 1136 /* added in yy until the next line, */ 1137 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 } else { 1139 /* do local part */ 1140 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1141 /* send it on its way */ 1142 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 /* values actually were received in the Begin() but we need to call this nop */ 1144 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 EXTERN_C_BEGIN 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1152 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1153 { 1154 MPI_Comm comm; 1155 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1156 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1157 IS Me,Notme; 1158 PetscErrorCode ierr; 1159 PetscInt M,N,first,last,*notme,i; 1160 PetscMPIInt size; 1161 1162 PetscFunctionBegin; 1163 1164 /* Easy test: symmetric diagonal block */ 1165 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1166 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1167 if (!*f) PetscFunctionReturn(0); 1168 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1169 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1170 if (size == 1) PetscFunctionReturn(0); 1171 1172 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1173 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1174 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1175 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1176 for (i=0; i<first; i++) notme[i] = i; 1177 for (i=last; i<M; i++) notme[i-last+first] = i; 1178 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1179 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1180 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1181 Aoff = Aoffs[0]; 1182 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1183 Boff = Boffs[0]; 1184 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1185 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1186 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1187 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1188 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1189 ierr = PetscFree(notme);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 EXTERN_C_END 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1197 { 1198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 /* do nondiagonal part */ 1203 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1204 /* send it on its way */ 1205 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* receive remote parts */ 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1220 { 1221 PetscErrorCode ierr; 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1226 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1227 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatScale_MPIAIJ" 1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1234 { 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1240 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatDestroy_MPIAIJ" 1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1247 { 1248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 #if defined(PETSC_USE_LOG) 1253 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1254 #endif 1255 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1256 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1257 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1258 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1259 #if defined (PETSC_USE_CTABLE) 1260 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1261 #else 1262 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1263 #endif 1264 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1266 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1267 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1268 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1269 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1270 1271 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1286 { 1287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1288 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1289 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1290 PetscErrorCode ierr; 1291 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1292 int fd; 1293 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1294 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1295 PetscScalar *column_values; 1296 PetscInt message_count,flowcontrolcount; 1297 1298 PetscFunctionBegin; 1299 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1300 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1301 nz = A->nz + B->nz; 1302 if (!rank) { 1303 header[0] = MAT_FILE_CLASSID; 1304 header[1] = mat->rmap->N; 1305 header[2] = mat->cmap->N; 1306 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1307 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1308 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1309 /* get largest number of rows any processor has */ 1310 rlen = mat->rmap->n; 1311 range = mat->rmap->range; 1312 for (i=1; i<size; i++) { 1313 rlen = PetscMax(rlen,range[i+1] - range[i]); 1314 } 1315 } else { 1316 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1317 rlen = mat->rmap->n; 1318 } 1319 1320 /* load up the local row counts */ 1321 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1322 for (i=0; i<mat->rmap->n; i++) { 1323 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1324 } 1325 1326 /* store the row lengths to the file */ 1327 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1328 if (!rank) { 1329 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1330 for (i=1; i<size; i++) { 1331 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1332 rlen = range[i+1] - range[i]; 1333 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 } 1336 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1337 } else { 1338 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1339 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1341 } 1342 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1343 1344 /* load up the local column indices */ 1345 nzmax = nz; /* )th processor needs space a largest processor needs */ 1346 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1347 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1348 cnt = 0; 1349 for (i=0; i<mat->rmap->n; i++) { 1350 for (j=B->i[i]; j<B->i[i+1]; j++) { 1351 if ( (col = garray[B->j[j]]) > cstart) break; 1352 column_indices[cnt++] = col; 1353 } 1354 for (k=A->i[i]; k<A->i[i+1]; k++) { 1355 column_indices[cnt++] = A->j[k] + cstart; 1356 } 1357 for (; j<B->i[i+1]; j++) { 1358 column_indices[cnt++] = garray[B->j[j]]; 1359 } 1360 } 1361 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1362 1363 /* store the column indices to the file */ 1364 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1365 if (!rank) { 1366 MPI_Status status; 1367 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1368 for (i=1; i<size; i++) { 1369 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1370 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1371 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1372 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1373 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1376 } else { 1377 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1378 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1380 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1381 } 1382 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1383 1384 /* load up the local column values */ 1385 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1386 cnt = 0; 1387 for (i=0; i<mat->rmap->n; i++) { 1388 for (j=B->i[i]; j<B->i[i+1]; j++) { 1389 if ( garray[B->j[j]] > cstart) break; 1390 column_values[cnt++] = B->a[j]; 1391 } 1392 for (k=A->i[i]; k<A->i[i+1]; k++) { 1393 column_values[cnt++] = A->a[k]; 1394 } 1395 for (; j<B->i[i+1]; j++) { 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 } 1399 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1400 1401 /* store the column values to the file */ 1402 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1403 if (!rank) { 1404 MPI_Status status; 1405 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1406 for (i=1; i<size; i++) { 1407 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1408 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1409 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1410 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1411 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 } 1413 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1414 } else { 1415 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1416 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1418 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1419 } 1420 ierr = PetscFree(column_values);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1427 { 1428 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1429 PetscErrorCode ierr; 1430 PetscMPIInt rank = aij->rank,size = aij->size; 1431 PetscBool isdraw,iascii,isbinary; 1432 PetscViewer sviewer; 1433 PetscViewerFormat format; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1437 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1438 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1439 if (iascii) { 1440 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1441 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1442 MatInfo info; 1443 PetscBool inodes; 1444 1445 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1446 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1447 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1448 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1449 if (!inodes) { 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1451 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1452 } else { 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1454 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1455 } 1456 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1458 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1460 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1463 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1466 PetscInt inodecount,inodelimit,*inodes; 1467 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1468 if (inodes) { 1469 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1475 PetscFunctionReturn(0); 1476 } 1477 } else if (isbinary) { 1478 if (size == 1) { 1479 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1480 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1481 } else { 1482 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } else if (isdraw) { 1486 PetscDraw draw; 1487 PetscBool isnull; 1488 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1489 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1490 } 1491 1492 if (size == 1) { 1493 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1494 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1495 } else { 1496 /* assemble the entire matrix onto first processor. */ 1497 Mat A; 1498 Mat_SeqAIJ *Aloc; 1499 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1500 MatScalar *a; 1501 1502 if (mat->rmap->N > 1024) { 1503 PetscBool flg = PETSC_FALSE; 1504 1505 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1506 if (!flg) { 1507 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1508 } 1509 } 1510 1511 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1512 if (!rank) { 1513 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1514 } else { 1515 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1516 } 1517 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1518 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1519 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1520 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1521 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1522 1523 /* copy over the A part */ 1524 Aloc = (Mat_SeqAIJ*)aij->A->data; 1525 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1526 row = mat->rmap->rstart; 1527 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1528 for (i=0; i<m; i++) { 1529 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1530 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1531 } 1532 aj = Aloc->j; 1533 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1534 1535 /* copy over the B part */ 1536 Aloc = (Mat_SeqAIJ*)aij->B->data; 1537 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1538 row = mat->rmap->rstart; 1539 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1540 ct = cols; 1541 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1542 for (i=0; i<m; i++) { 1543 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1544 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1545 } 1546 ierr = PetscFree(ct);CHKERRQ(ierr); 1547 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1548 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1549 /* 1550 Everyone has to call to draw the matrix since the graphics waits are 1551 synchronized across all processors that share the PetscDraw object 1552 */ 1553 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1554 if (!rank) { 1555 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1556 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1557 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1558 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1559 } 1560 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1561 ierr = MatDestroy(&A);CHKERRQ(ierr); 1562 } 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatView_MPIAIJ" 1568 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1569 { 1570 PetscErrorCode ierr; 1571 PetscBool iascii,isdraw,issocket,isbinary; 1572 1573 PetscFunctionBegin; 1574 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1575 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1576 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1577 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1578 if (iascii || isdraw || isbinary || issocket) { 1579 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1580 } else { 1581 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1582 } 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatSOR_MPIAIJ" 1588 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1589 { 1590 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1591 PetscErrorCode ierr; 1592 Vec bb1 = 0; 1593 PetscBool hasop; 1594 1595 PetscFunctionBegin; 1596 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1597 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1598 } 1599 1600 if (flag == SOR_APPLY_UPPER) { 1601 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1602 PetscFunctionReturn(0); 1603 } 1604 1605 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1606 if (flag & SOR_ZERO_INITIAL_GUESS) { 1607 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1608 its--; 1609 } 1610 1611 while (its--) { 1612 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 1615 /* update rhs: bb1 = bb - B*x */ 1616 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1617 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1618 1619 /* local sweep */ 1620 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1621 } 1622 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1623 if (flag & SOR_ZERO_INITIAL_GUESS) { 1624 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1625 its--; 1626 } 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_EISENSTAT) { 1655 Vec xx1; 1656 1657 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1659 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 if (!mat->diag) { 1663 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1664 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1665 } 1666 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1667 if (hasop) { 1668 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1669 } else { 1670 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1671 } 1672 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1673 1674 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1675 1676 /* local sweep */ 1677 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1678 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1679 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1680 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1681 1682 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatPermute_MPIAIJ" 1688 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1689 { 1690 MPI_Comm comm; 1691 PetscInt first,local_rowsize,local_colsize; 1692 const PetscInt *rows; 1693 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1698 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1699 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1700 /* collect the global row permutation and invert it */ 1701 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1702 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1703 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1704 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1705 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1706 /* get the local target indices */ 1707 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1708 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1709 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1710 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1711 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1712 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1713 /* the column permutation is so much easier; 1714 make a local version of 'colp' and invert it */ 1715 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1716 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1717 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1718 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1719 /* now we just get the submatrix */ 1720 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1721 /* clean up */ 1722 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1723 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1729 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1730 { 1731 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1732 Mat A = mat->A,B = mat->B; 1733 PetscErrorCode ierr; 1734 PetscReal isend[5],irecv[5]; 1735 1736 PetscFunctionBegin; 1737 info->block_size = 1.0; 1738 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1739 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1740 isend[3] = info->memory; isend[4] = info->mallocs; 1741 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1742 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1743 isend[3] += info->memory; isend[4] += info->mallocs; 1744 if (flag == MAT_LOCAL) { 1745 info->nz_used = isend[0]; 1746 info->nz_allocated = isend[1]; 1747 info->nz_unneeded = isend[2]; 1748 info->memory = isend[3]; 1749 info->mallocs = isend[4]; 1750 } else if (flag == MAT_GLOBAL_MAX) { 1751 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1752 info->nz_used = irecv[0]; 1753 info->nz_allocated = irecv[1]; 1754 info->nz_unneeded = irecv[2]; 1755 info->memory = irecv[3]; 1756 info->mallocs = irecv[4]; 1757 } else if (flag == MAT_GLOBAL_SUM) { 1758 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1759 info->nz_used = irecv[0]; 1760 info->nz_allocated = irecv[1]; 1761 info->nz_unneeded = irecv[2]; 1762 info->memory = irecv[3]; 1763 info->mallocs = irecv[4]; 1764 } 1765 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1766 info->fill_ratio_needed = 0; 1767 info->factor_mallocs = 0; 1768 1769 PetscFunctionReturn(0); 1770 } 1771 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatSetOption_MPIAIJ" 1774 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1775 { 1776 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 switch (op) { 1781 case MAT_NEW_NONZERO_LOCATIONS: 1782 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1783 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1784 case MAT_KEEP_NONZERO_PATTERN: 1785 case MAT_NEW_NONZERO_LOCATION_ERR: 1786 case MAT_USE_INODES: 1787 case MAT_IGNORE_ZERO_ENTRIES: 1788 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1789 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1790 break; 1791 case MAT_ROW_ORIENTED: 1792 a->roworiented = flg; 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1795 break; 1796 case MAT_NEW_DIAGONALS: 1797 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1798 break; 1799 case MAT_IGNORE_OFF_PROC_ENTRIES: 1800 a->donotstash = flg; 1801 break; 1802 case MAT_SPD: 1803 A->spd_set = PETSC_TRUE; 1804 A->spd = flg; 1805 if (flg) { 1806 A->symmetric = PETSC_TRUE; 1807 A->structurally_symmetric = PETSC_TRUE; 1808 A->symmetric_set = PETSC_TRUE; 1809 A->structurally_symmetric_set = PETSC_TRUE; 1810 } 1811 break; 1812 case MAT_SYMMETRIC: 1813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1814 break; 1815 case MAT_STRUCTURALLY_SYMMETRIC: 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 break; 1818 case MAT_HERMITIAN: 1819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1820 break; 1821 case MAT_SYMMETRY_ETERNAL: 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 break; 1824 default: 1825 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1826 } 1827 PetscFunctionReturn(0); 1828 } 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatGetRow_MPIAIJ" 1832 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1833 { 1834 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1835 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1836 PetscErrorCode ierr; 1837 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1838 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1839 PetscInt *cmap,*idx_p; 1840 1841 PetscFunctionBegin; 1842 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1843 mat->getrowactive = PETSC_TRUE; 1844 1845 if (!mat->rowvalues && (idx || v)) { 1846 /* 1847 allocate enough space to hold information from the longest row. 1848 */ 1849 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1850 PetscInt max = 1,tmp; 1851 for (i=0; i<matin->rmap->n; i++) { 1852 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1853 if (max < tmp) { max = tmp; } 1854 } 1855 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1856 } 1857 1858 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1859 lrow = row - rstart; 1860 1861 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1862 if (!v) {pvA = 0; pvB = 0;} 1863 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1864 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1865 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1866 nztot = nzA + nzB; 1867 1868 cmap = mat->garray; 1869 if (v || idx) { 1870 if (nztot) { 1871 /* Sort by increasing column numbers, assuming A and B already sorted */ 1872 PetscInt imark = -1; 1873 if (v) { 1874 *v = v_p = mat->rowvalues; 1875 for (i=0; i<nzB; i++) { 1876 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1877 else break; 1878 } 1879 imark = i; 1880 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1881 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1882 } 1883 if (idx) { 1884 *idx = idx_p = mat->rowindices; 1885 if (imark > -1) { 1886 for (i=0; i<imark; i++) { 1887 idx_p[i] = cmap[cworkB[i]]; 1888 } 1889 } else { 1890 for (i=0; i<nzB; i++) { 1891 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1892 else break; 1893 } 1894 imark = i; 1895 } 1896 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1897 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1898 } 1899 } else { 1900 if (idx) *idx = 0; 1901 if (v) *v = 0; 1902 } 1903 } 1904 *nz = nztot; 1905 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1906 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1912 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1913 { 1914 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1915 1916 PetscFunctionBegin; 1917 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1918 aij->getrowactive = PETSC_FALSE; 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatNorm_MPIAIJ" 1924 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1925 { 1926 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1927 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1928 PetscErrorCode ierr; 1929 PetscInt i,j,cstart = mat->cmap->rstart; 1930 PetscReal sum = 0.0; 1931 MatScalar *v; 1932 1933 PetscFunctionBegin; 1934 if (aij->size == 1) { 1935 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1936 } else { 1937 if (type == NORM_FROBENIUS) { 1938 v = amat->a; 1939 for (i=0; i<amat->nz; i++) { 1940 #if defined(PETSC_USE_COMPLEX) 1941 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1942 #else 1943 sum += (*v)*(*v); v++; 1944 #endif 1945 } 1946 v = bmat->a; 1947 for (i=0; i<bmat->nz; i++) { 1948 #if defined(PETSC_USE_COMPLEX) 1949 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1950 #else 1951 sum += (*v)*(*v); v++; 1952 #endif 1953 } 1954 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1955 *norm = PetscSqrtReal(*norm); 1956 } else if (type == NORM_1) { /* max column norm */ 1957 PetscReal *tmp,*tmp2; 1958 PetscInt *jj,*garray = aij->garray; 1959 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1960 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1961 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1962 *norm = 0.0; 1963 v = amat->a; jj = amat->j; 1964 for (j=0; j<amat->nz; j++) { 1965 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1966 } 1967 v = bmat->a; jj = bmat->j; 1968 for (j=0; j<bmat->nz; j++) { 1969 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1970 } 1971 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1972 for (j=0; j<mat->cmap->N; j++) { 1973 if (tmp2[j] > *norm) *norm = tmp2[j]; 1974 } 1975 ierr = PetscFree(tmp);CHKERRQ(ierr); 1976 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1977 } else if (type == NORM_INFINITY) { /* max row norm */ 1978 PetscReal ntemp = 0.0; 1979 for (j=0; j<aij->A->rmap->n; j++) { 1980 v = amat->a + amat->i[j]; 1981 sum = 0.0; 1982 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1983 sum += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a + bmat->i[j]; 1986 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 if (sum > ntemp) ntemp = sum; 1990 } 1991 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1992 } else { 1993 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1994 } 1995 } 1996 PetscFunctionReturn(0); 1997 } 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "MatTranspose_MPIAIJ" 2001 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2002 { 2003 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2004 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2005 PetscErrorCode ierr; 2006 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2007 PetscInt cstart=A->cmap->rstart,ncol; 2008 Mat B; 2009 MatScalar *array; 2010 2011 PetscFunctionBegin; 2012 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2013 2014 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2015 ai = Aloc->i; aj = Aloc->j; 2016 bi = Bloc->i; bj = Bloc->j; 2017 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2018 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2019 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2020 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2021 for (i=0; i<ai[ma]; i++){ 2022 d_nnz[aj[i]] ++; 2023 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2024 } 2025 2026 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2027 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2028 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2029 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2030 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2031 } else { 2032 B = *matout; 2033 } 2034 2035 /* copy over the A part */ 2036 array = Aloc->a; 2037 row = A->rmap->rstart; 2038 for (i=0; i<ma; i++) { 2039 ncol = ai[i+1]-ai[i]; 2040 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2041 row++; array += ncol; aj += ncol; 2042 } 2043 aj = Aloc->j; 2044 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2045 2046 /* copy over the B part */ 2047 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2048 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2049 array = Bloc->a; 2050 row = A->rmap->rstart; 2051 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2052 cols_tmp = cols; 2053 for (i=0; i<mb; i++) { 2054 ncol = bi[i+1]-bi[i]; 2055 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2056 row++; array += ncol; cols_tmp += ncol; 2057 } 2058 ierr = PetscFree(cols);CHKERRQ(ierr); 2059 2060 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2061 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2062 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2063 *matout = B; 2064 } else { 2065 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2066 } 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2072 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2073 { 2074 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2075 Mat a = aij->A,b = aij->B; 2076 PetscErrorCode ierr; 2077 PetscInt s1,s2,s3; 2078 2079 PetscFunctionBegin; 2080 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2081 if (rr) { 2082 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2083 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2084 /* Overlap communication with computation. */ 2085 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2086 } 2087 if (ll) { 2088 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2089 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2090 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2091 } 2092 /* scale the diagonal block */ 2093 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2094 2095 if (rr) { 2096 /* Do a scatter end and then right scale the off-diagonal block */ 2097 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2098 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2099 } 2100 2101 PetscFunctionReturn(0); 2102 } 2103 2104 #undef __FUNCT__ 2105 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2106 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2107 { 2108 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2109 PetscErrorCode ierr; 2110 2111 PetscFunctionBegin; 2112 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2113 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2114 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2115 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 #undef __FUNCT__ 2119 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2120 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2121 { 2122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2123 PetscErrorCode ierr; 2124 2125 PetscFunctionBegin; 2126 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2127 PetscFunctionReturn(0); 2128 } 2129 2130 #undef __FUNCT__ 2131 #define __FUNCT__ "MatEqual_MPIAIJ" 2132 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2133 { 2134 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2135 Mat a,b,c,d; 2136 PetscBool flg; 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 a = matA->A; b = matA->B; 2141 c = matB->A; d = matB->B; 2142 2143 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2144 if (flg) { 2145 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2146 } 2147 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatCopy_MPIAIJ" 2153 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2154 { 2155 PetscErrorCode ierr; 2156 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2157 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2158 2159 PetscFunctionBegin; 2160 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2161 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2162 /* because of the column compression in the off-processor part of the matrix a->B, 2163 the number of columns in a->B and b->B may be different, hence we cannot call 2164 the MatCopy() directly on the two parts. If need be, we can provide a more 2165 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2166 then copying the submatrices */ 2167 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2168 } else { 2169 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2170 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2171 } 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2177 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2178 { 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2183 PetscFunctionReturn(0); 2184 } 2185 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "MatAXPY_MPIAIJ" 2188 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2189 { 2190 PetscErrorCode ierr; 2191 PetscInt i; 2192 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2193 PetscBLASInt bnz,one=1; 2194 Mat_SeqAIJ *x,*y; 2195 2196 PetscFunctionBegin; 2197 if (str == SAME_NONZERO_PATTERN) { 2198 PetscScalar alpha = a; 2199 x = (Mat_SeqAIJ *)xx->A->data; 2200 y = (Mat_SeqAIJ *)yy->A->data; 2201 bnz = PetscBLASIntCast(x->nz); 2202 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2203 x = (Mat_SeqAIJ *)xx->B->data; 2204 y = (Mat_SeqAIJ *)yy->B->data; 2205 bnz = PetscBLASIntCast(x->nz); 2206 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2207 } else if (str == SUBSET_NONZERO_PATTERN) { 2208 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2209 2210 x = (Mat_SeqAIJ *)xx->B->data; 2211 y = (Mat_SeqAIJ *)yy->B->data; 2212 if (y->xtoy && y->XtoY != xx->B) { 2213 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2214 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2215 } 2216 if (!y->xtoy) { /* get xtoy */ 2217 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2218 y->XtoY = xx->B; 2219 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2220 } 2221 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2222 } else { 2223 Mat B; 2224 PetscInt *nnz_d,*nnz_o; 2225 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2226 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2227 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2228 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2229 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2230 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2231 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2232 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2233 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2234 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2235 ierr = MatHeaderReplace(Y,B); 2236 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2237 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2238 } 2239 PetscFunctionReturn(0); 2240 } 2241 2242 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "MatConjugate_MPIAIJ" 2246 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2247 { 2248 #if defined(PETSC_USE_COMPLEX) 2249 PetscErrorCode ierr; 2250 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2251 2252 PetscFunctionBegin; 2253 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2254 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2255 #else 2256 PetscFunctionBegin; 2257 #endif 2258 PetscFunctionReturn(0); 2259 } 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatRealPart_MPIAIJ" 2263 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2264 { 2265 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2266 PetscErrorCode ierr; 2267 2268 PetscFunctionBegin; 2269 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2270 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2271 PetscFunctionReturn(0); 2272 } 2273 2274 #undef __FUNCT__ 2275 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2276 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2277 { 2278 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2279 PetscErrorCode ierr; 2280 2281 PetscFunctionBegin; 2282 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2283 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2284 PetscFunctionReturn(0); 2285 } 2286 2287 #ifdef PETSC_HAVE_PBGL 2288 2289 #include <boost/parallel/mpi/bsp_process_group.hpp> 2290 #include <boost/graph/distributed/ilu_default_graph.hpp> 2291 #include <boost/graph/distributed/ilu_0_block.hpp> 2292 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2293 #include <boost/graph/distributed/petsc/interface.hpp> 2294 #include <boost/multi_array.hpp> 2295 #include <boost/parallel/distributed_property_map->hpp> 2296 2297 #undef __FUNCT__ 2298 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2299 /* 2300 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2301 */ 2302 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2303 { 2304 namespace petsc = boost::distributed::petsc; 2305 2306 namespace graph_dist = boost::graph::distributed; 2307 using boost::graph::distributed::ilu_default::process_group_type; 2308 using boost::graph::ilu_permuted; 2309 2310 PetscBool row_identity, col_identity; 2311 PetscContainer c; 2312 PetscInt m, n, M, N; 2313 PetscErrorCode ierr; 2314 2315 PetscFunctionBegin; 2316 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2317 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2318 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2319 if (!row_identity || !col_identity) { 2320 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2321 } 2322 2323 process_group_type pg; 2324 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2325 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2326 lgraph_type& level_graph = *lgraph_p; 2327 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2328 2329 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2330 ilu_permuted(level_graph); 2331 2332 /* put together the new matrix */ 2333 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2334 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2335 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2336 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2337 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2338 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2339 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2340 2341 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2342 ierr = PetscContainerSetPointer(c, lgraph_p); 2343 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2344 ierr = PetscContainerDestroy(&c); 2345 PetscFunctionReturn(0); 2346 } 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2350 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2351 { 2352 PetscFunctionBegin; 2353 PetscFunctionReturn(0); 2354 } 2355 2356 #undef __FUNCT__ 2357 #define __FUNCT__ "MatSolve_MPIAIJ" 2358 /* 2359 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2360 */ 2361 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2362 { 2363 namespace graph_dist = boost::graph::distributed; 2364 2365 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2366 lgraph_type* lgraph_p; 2367 PetscContainer c; 2368 PetscErrorCode ierr; 2369 2370 PetscFunctionBegin; 2371 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2372 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2373 ierr = VecCopy(b, x);CHKERRQ(ierr); 2374 2375 PetscScalar* array_x; 2376 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2377 PetscInt sx; 2378 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2379 2380 PetscScalar* array_b; 2381 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2382 PetscInt sb; 2383 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2384 2385 lgraph_type& level_graph = *lgraph_p; 2386 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2387 2388 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2389 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2390 ref_x(array_x, boost::extents[num_vertices(graph)]); 2391 2392 typedef boost::iterator_property_map<array_ref_type::iterator, 2393 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2394 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2395 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2396 2397 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2398 2399 PetscFunctionReturn(0); 2400 } 2401 #endif 2402 2403 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2404 PetscInt nzlocal,nsends,nrecvs; 2405 PetscMPIInt *send_rank,*recv_rank; 2406 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2407 PetscScalar *sbuf_a,**rbuf_a; 2408 PetscErrorCode (*Destroy)(Mat); 2409 } Mat_Redundant; 2410 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2413 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2414 { 2415 PetscErrorCode ierr; 2416 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2417 PetscInt i; 2418 2419 PetscFunctionBegin; 2420 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2421 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2422 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2423 for (i=0; i<redund->nrecvs; i++){ 2424 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2425 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2426 } 2427 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2428 ierr = PetscFree(redund);CHKERRQ(ierr); 2429 PetscFunctionReturn(0); 2430 } 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "MatDestroy_MatRedundant" 2434 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2435 { 2436 PetscErrorCode ierr; 2437 PetscContainer container; 2438 Mat_Redundant *redund=PETSC_NULL; 2439 2440 PetscFunctionBegin; 2441 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2442 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2443 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2444 A->ops->destroy = redund->Destroy; 2445 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2446 if (A->ops->destroy) { 2447 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2448 } 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2454 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2455 { 2456 PetscMPIInt rank,size; 2457 MPI_Comm comm=((PetscObject)mat)->comm; 2458 PetscErrorCode ierr; 2459 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2460 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2461 PetscInt *rowrange=mat->rmap->range; 2462 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2463 Mat A=aij->A,B=aij->B,C=*matredundant; 2464 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2465 PetscScalar *sbuf_a; 2466 PetscInt nzlocal=a->nz+b->nz; 2467 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2468 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2469 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2470 MatScalar *aworkA,*aworkB; 2471 PetscScalar *vals; 2472 PetscMPIInt tag1,tag2,tag3,imdex; 2473 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2474 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2475 MPI_Status recv_status,*send_status; 2476 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2477 PetscInt **rbuf_j=PETSC_NULL; 2478 PetscScalar **rbuf_a=PETSC_NULL; 2479 Mat_Redundant *redund=PETSC_NULL; 2480 PetscContainer container; 2481 2482 PetscFunctionBegin; 2483 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2484 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2485 2486 if (reuse == MAT_REUSE_MATRIX) { 2487 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2488 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2489 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2490 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2491 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2492 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2493 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2494 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2495 2496 nsends = redund->nsends; 2497 nrecvs = redund->nrecvs; 2498 send_rank = redund->send_rank; 2499 recv_rank = redund->recv_rank; 2500 sbuf_nz = redund->sbuf_nz; 2501 rbuf_nz = redund->rbuf_nz; 2502 sbuf_j = redund->sbuf_j; 2503 sbuf_a = redund->sbuf_a; 2504 rbuf_j = redund->rbuf_j; 2505 rbuf_a = redund->rbuf_a; 2506 } 2507 2508 if (reuse == MAT_INITIAL_MATRIX){ 2509 PetscMPIInt subrank,subsize; 2510 PetscInt nleftover,np_subcomm; 2511 /* get the destination processors' id send_rank, nsends and nrecvs */ 2512 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2513 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2514 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2515 np_subcomm = size/nsubcomm; 2516 nleftover = size - nsubcomm*np_subcomm; 2517 nsends = 0; nrecvs = 0; 2518 for (i=0; i<size; i++){ /* i=rank*/ 2519 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2520 send_rank[nsends] = i; nsends++; 2521 recv_rank[nrecvs++] = i; 2522 } 2523 } 2524 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2525 i = size-nleftover-1; 2526 j = 0; 2527 while (j < nsubcomm - nleftover){ 2528 send_rank[nsends++] = i; 2529 i--; j++; 2530 } 2531 } 2532 2533 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2534 for (i=0; i<nleftover; i++){ 2535 recv_rank[nrecvs++] = size-nleftover+i; 2536 } 2537 } 2538 2539 /* allocate sbuf_j, sbuf_a */ 2540 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2541 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2542 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2543 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2544 2545 /* copy mat's local entries into the buffers */ 2546 if (reuse == MAT_INITIAL_MATRIX){ 2547 rownz_max = 0; 2548 rptr = sbuf_j; 2549 cols = sbuf_j + rend-rstart + 1; 2550 vals = sbuf_a; 2551 rptr[0] = 0; 2552 for (i=0; i<rend-rstart; i++){ 2553 row = i + rstart; 2554 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2555 ncols = nzA + nzB; 2556 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2557 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2558 /* load the column indices for this row into cols */ 2559 lwrite = 0; 2560 for (l=0; l<nzB; l++) { 2561 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2562 vals[lwrite] = aworkB[l]; 2563 cols[lwrite++] = ctmp; 2564 } 2565 } 2566 for (l=0; l<nzA; l++){ 2567 vals[lwrite] = aworkA[l]; 2568 cols[lwrite++] = cstart + cworkA[l]; 2569 } 2570 for (l=0; l<nzB; l++) { 2571 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2572 vals[lwrite] = aworkB[l]; 2573 cols[lwrite++] = ctmp; 2574 } 2575 } 2576 vals += ncols; 2577 cols += ncols; 2578 rptr[i+1] = rptr[i] + ncols; 2579 if (rownz_max < ncols) rownz_max = ncols; 2580 } 2581 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2582 } else { /* only copy matrix values into sbuf_a */ 2583 rptr = sbuf_j; 2584 vals = sbuf_a; 2585 rptr[0] = 0; 2586 for (i=0; i<rend-rstart; i++){ 2587 row = i + rstart; 2588 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2589 ncols = nzA + nzB; 2590 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2591 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2592 lwrite = 0; 2593 for (l=0; l<nzB; l++) { 2594 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2595 } 2596 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2597 for (l=0; l<nzB; l++) { 2598 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2599 } 2600 vals += ncols; 2601 rptr[i+1] = rptr[i] + ncols; 2602 } 2603 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2604 2605 /* send nzlocal to others, and recv other's nzlocal */ 2606 /*--------------------------------------------------*/ 2607 if (reuse == MAT_INITIAL_MATRIX){ 2608 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2609 s_waits2 = s_waits3 + nsends; 2610 s_waits1 = s_waits2 + nsends; 2611 r_waits1 = s_waits1 + nsends; 2612 r_waits2 = r_waits1 + nrecvs; 2613 r_waits3 = r_waits2 + nrecvs; 2614 } else { 2615 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2616 r_waits3 = s_waits3 + nsends; 2617 } 2618 2619 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2620 if (reuse == MAT_INITIAL_MATRIX){ 2621 /* get new tags to keep the communication clean */ 2622 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2623 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2624 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2625 2626 /* post receives of other's nzlocal */ 2627 for (i=0; i<nrecvs; i++){ 2628 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2629 } 2630 /* send nzlocal to others */ 2631 for (i=0; i<nsends; i++){ 2632 sbuf_nz[i] = nzlocal; 2633 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2634 } 2635 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2636 count = nrecvs; 2637 while (count) { 2638 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2639 recv_rank[imdex] = recv_status.MPI_SOURCE; 2640 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2641 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2642 2643 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2644 rbuf_nz[imdex] += i + 2; 2645 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2646 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2647 count--; 2648 } 2649 /* wait on sends of nzlocal */ 2650 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2651 /* send mat->i,j to others, and recv from other's */ 2652 /*------------------------------------------------*/ 2653 for (i=0; i<nsends; i++){ 2654 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2655 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2656 } 2657 /* wait on receives of mat->i,j */ 2658 /*------------------------------*/ 2659 count = nrecvs; 2660 while (count) { 2661 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2662 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2663 count--; 2664 } 2665 /* wait on sends of mat->i,j */ 2666 /*---------------------------*/ 2667 if (nsends) { 2668 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2669 } 2670 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2671 2672 /* post receives, send and receive mat->a */ 2673 /*----------------------------------------*/ 2674 for (imdex=0; imdex<nrecvs; imdex++) { 2675 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2676 } 2677 for (i=0; i<nsends; i++){ 2678 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2679 } 2680 count = nrecvs; 2681 while (count) { 2682 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2683 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2684 count--; 2685 } 2686 if (nsends) { 2687 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2688 } 2689 2690 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2691 2692 /* create redundant matrix */ 2693 /*-------------------------*/ 2694 if (reuse == MAT_INITIAL_MATRIX){ 2695 /* compute rownz_max for preallocation */ 2696 for (imdex=0; imdex<nrecvs; imdex++){ 2697 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2698 rptr = rbuf_j[imdex]; 2699 for (i=0; i<j; i++){ 2700 ncols = rptr[i+1] - rptr[i]; 2701 if (rownz_max < ncols) rownz_max = ncols; 2702 } 2703 } 2704 2705 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2706 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2707 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2708 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2709 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2710 } else { 2711 C = *matredundant; 2712 } 2713 2714 /* insert local matrix entries */ 2715 rptr = sbuf_j; 2716 cols = sbuf_j + rend-rstart + 1; 2717 vals = sbuf_a; 2718 for (i=0; i<rend-rstart; i++){ 2719 row = i + rstart; 2720 ncols = rptr[i+1] - rptr[i]; 2721 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2722 vals += ncols; 2723 cols += ncols; 2724 } 2725 /* insert received matrix entries */ 2726 for (imdex=0; imdex<nrecvs; imdex++){ 2727 rstart = rowrange[recv_rank[imdex]]; 2728 rend = rowrange[recv_rank[imdex]+1]; 2729 rptr = rbuf_j[imdex]; 2730 cols = rbuf_j[imdex] + rend-rstart + 1; 2731 vals = rbuf_a[imdex]; 2732 for (i=0; i<rend-rstart; i++){ 2733 row = i + rstart; 2734 ncols = rptr[i+1] - rptr[i]; 2735 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2736 vals += ncols; 2737 cols += ncols; 2738 } 2739 } 2740 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2741 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2742 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2743 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2744 if (reuse == MAT_INITIAL_MATRIX) { 2745 PetscContainer container; 2746 *matredundant = C; 2747 /* create a supporting struct and attach it to C for reuse */ 2748 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2749 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2750 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2751 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2752 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2753 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2754 2755 redund->nzlocal = nzlocal; 2756 redund->nsends = nsends; 2757 redund->nrecvs = nrecvs; 2758 redund->send_rank = send_rank; 2759 redund->recv_rank = recv_rank; 2760 redund->sbuf_nz = sbuf_nz; 2761 redund->rbuf_nz = rbuf_nz; 2762 redund->sbuf_j = sbuf_j; 2763 redund->sbuf_a = sbuf_a; 2764 redund->rbuf_j = rbuf_j; 2765 redund->rbuf_a = rbuf_a; 2766 2767 redund->Destroy = C->ops->destroy; 2768 C->ops->destroy = MatDestroy_MatRedundant; 2769 } 2770 PetscFunctionReturn(0); 2771 } 2772 2773 #undef __FUNCT__ 2774 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2775 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2776 { 2777 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2778 PetscErrorCode ierr; 2779 PetscInt i,*idxb = 0; 2780 PetscScalar *va,*vb; 2781 Vec vtmp; 2782 2783 PetscFunctionBegin; 2784 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2785 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2786 if (idx) { 2787 for (i=0; i<A->rmap->n; i++) { 2788 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2789 } 2790 } 2791 2792 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2793 if (idx) { 2794 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2795 } 2796 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2797 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2798 2799 for (i=0; i<A->rmap->n; i++){ 2800 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2801 va[i] = vb[i]; 2802 if (idx) idx[i] = a->garray[idxb[i]]; 2803 } 2804 } 2805 2806 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2807 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2808 ierr = PetscFree(idxb);CHKERRQ(ierr); 2809 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2810 PetscFunctionReturn(0); 2811 } 2812 2813 #undef __FUNCT__ 2814 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2815 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2816 { 2817 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2818 PetscErrorCode ierr; 2819 PetscInt i,*idxb = 0; 2820 PetscScalar *va,*vb; 2821 Vec vtmp; 2822 2823 PetscFunctionBegin; 2824 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2825 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2826 if (idx) { 2827 for (i=0; i<A->cmap->n; i++) { 2828 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2829 } 2830 } 2831 2832 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2833 if (idx) { 2834 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2835 } 2836 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2837 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2838 2839 for (i=0; i<A->rmap->n; i++){ 2840 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2841 va[i] = vb[i]; 2842 if (idx) idx[i] = a->garray[idxb[i]]; 2843 } 2844 } 2845 2846 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2847 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2848 ierr = PetscFree(idxb);CHKERRQ(ierr); 2849 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2850 PetscFunctionReturn(0); 2851 } 2852 2853 #undef __FUNCT__ 2854 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2855 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2856 { 2857 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2858 PetscInt n = A->rmap->n; 2859 PetscInt cstart = A->cmap->rstart; 2860 PetscInt *cmap = mat->garray; 2861 PetscInt *diagIdx, *offdiagIdx; 2862 Vec diagV, offdiagV; 2863 PetscScalar *a, *diagA, *offdiagA; 2864 PetscInt r; 2865 PetscErrorCode ierr; 2866 2867 PetscFunctionBegin; 2868 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2869 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2870 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2871 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2872 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2873 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2874 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2875 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2876 for(r = 0; r < n; ++r) { 2877 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2878 a[r] = diagA[r]; 2879 idx[r] = cstart + diagIdx[r]; 2880 } else { 2881 a[r] = offdiagA[r]; 2882 idx[r] = cmap[offdiagIdx[r]]; 2883 } 2884 } 2885 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2886 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2887 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2888 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2889 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2890 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2891 PetscFunctionReturn(0); 2892 } 2893 2894 #undef __FUNCT__ 2895 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2896 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2897 { 2898 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2899 PetscInt n = A->rmap->n; 2900 PetscInt cstart = A->cmap->rstart; 2901 PetscInt *cmap = mat->garray; 2902 PetscInt *diagIdx, *offdiagIdx; 2903 Vec diagV, offdiagV; 2904 PetscScalar *a, *diagA, *offdiagA; 2905 PetscInt r; 2906 PetscErrorCode ierr; 2907 2908 PetscFunctionBegin; 2909 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2910 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2911 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2912 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2913 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2914 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2915 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2916 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2917 for(r = 0; r < n; ++r) { 2918 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2919 a[r] = diagA[r]; 2920 idx[r] = cstart + diagIdx[r]; 2921 } else { 2922 a[r] = offdiagA[r]; 2923 idx[r] = cmap[offdiagIdx[r]]; 2924 } 2925 } 2926 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2928 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2929 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2930 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2931 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2932 PetscFunctionReturn(0); 2933 } 2934 2935 #undef __FUNCT__ 2936 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2937 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2938 { 2939 PetscErrorCode ierr; 2940 Mat *dummy; 2941 2942 PetscFunctionBegin; 2943 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2944 *newmat = *dummy; 2945 ierr = PetscFree(dummy);CHKERRQ(ierr); 2946 PetscFunctionReturn(0); 2947 } 2948 2949 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2950 2951 #undef __FUNCT__ 2952 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2953 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2954 { 2955 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2956 PetscErrorCode ierr; 2957 2958 PetscFunctionBegin; 2959 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2960 PetscFunctionReturn(0); 2961 } 2962 2963 2964 /* -------------------------------------------------------------------*/ 2965 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2966 MatGetRow_MPIAIJ, 2967 MatRestoreRow_MPIAIJ, 2968 MatMult_MPIAIJ, 2969 /* 4*/ MatMultAdd_MPIAIJ, 2970 MatMultTranspose_MPIAIJ, 2971 MatMultTransposeAdd_MPIAIJ, 2972 #ifdef PETSC_HAVE_PBGL 2973 MatSolve_MPIAIJ, 2974 #else 2975 0, 2976 #endif 2977 0, 2978 0, 2979 /*10*/ 0, 2980 0, 2981 0, 2982 MatSOR_MPIAIJ, 2983 MatTranspose_MPIAIJ, 2984 /*15*/ MatGetInfo_MPIAIJ, 2985 MatEqual_MPIAIJ, 2986 MatGetDiagonal_MPIAIJ, 2987 MatDiagonalScale_MPIAIJ, 2988 MatNorm_MPIAIJ, 2989 /*20*/ MatAssemblyBegin_MPIAIJ, 2990 MatAssemblyEnd_MPIAIJ, 2991 MatSetOption_MPIAIJ, 2992 MatZeroEntries_MPIAIJ, 2993 /*24*/ MatZeroRows_MPIAIJ, 2994 0, 2995 #ifdef PETSC_HAVE_PBGL 2996 0, 2997 #else 2998 0, 2999 #endif 3000 0, 3001 0, 3002 /*29*/ MatSetUpPreallocation_MPIAIJ, 3003 #ifdef PETSC_HAVE_PBGL 3004 0, 3005 #else 3006 0, 3007 #endif 3008 0, 3009 0, 3010 0, 3011 /*34*/ MatDuplicate_MPIAIJ, 3012 0, 3013 0, 3014 0, 3015 0, 3016 /*39*/ MatAXPY_MPIAIJ, 3017 MatGetSubMatrices_MPIAIJ, 3018 MatIncreaseOverlap_MPIAIJ, 3019 MatGetValues_MPIAIJ, 3020 MatCopy_MPIAIJ, 3021 /*44*/ MatGetRowMax_MPIAIJ, 3022 MatScale_MPIAIJ, 3023 0, 3024 0, 3025 MatZeroRowsColumns_MPIAIJ, 3026 /*49*/ MatSetBlockSize_MPIAIJ, 3027 0, 3028 0, 3029 0, 3030 0, 3031 /*54*/ MatFDColoringCreate_MPIAIJ, 3032 0, 3033 MatSetUnfactored_MPIAIJ, 3034 MatPermute_MPIAIJ, 3035 0, 3036 /*59*/ MatGetSubMatrix_MPIAIJ, 3037 MatDestroy_MPIAIJ, 3038 MatView_MPIAIJ, 3039 0, 3040 0, 3041 /*64*/ 0, 3042 0, 3043 0, 3044 0, 3045 0, 3046 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3047 MatGetRowMinAbs_MPIAIJ, 3048 0, 3049 MatSetColoring_MPIAIJ, 3050 #if defined(PETSC_HAVE_ADIC) 3051 MatSetValuesAdic_MPIAIJ, 3052 #else 3053 0, 3054 #endif 3055 MatSetValuesAdifor_MPIAIJ, 3056 /*75*/ MatFDColoringApply_AIJ, 3057 0, 3058 0, 3059 0, 3060 0, 3061 /*80*/ 0, 3062 0, 3063 0, 3064 /*83*/ MatLoad_MPIAIJ, 3065 0, 3066 0, 3067 0, 3068 0, 3069 0, 3070 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3071 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3072 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3073 MatPtAP_Basic, 3074 MatPtAPSymbolic_MPIAIJ, 3075 /*94*/ MatPtAPNumeric_MPIAIJ, 3076 0, 3077 0, 3078 0, 3079 0, 3080 /*99*/ 0, 3081 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3082 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3083 MatConjugate_MPIAIJ, 3084 0, 3085 /*104*/MatSetValuesRow_MPIAIJ, 3086 MatRealPart_MPIAIJ, 3087 MatImaginaryPart_MPIAIJ, 3088 0, 3089 0, 3090 /*109*/0, 3091 MatGetRedundantMatrix_MPIAIJ, 3092 MatGetRowMin_MPIAIJ, 3093 0, 3094 0, 3095 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3096 0, 3097 0, 3098 0, 3099 0, 3100 /*119*/0, 3101 0, 3102 0, 3103 0, 3104 MatGetMultiProcBlock_MPIAIJ, 3105 /*124*/MatFindNonZeroRows_MPIAIJ, 3106 MatGetColumnNorms_MPIAIJ, 3107 MatInvertBlockDiagonal_MPIAIJ, 3108 0, 3109 MatGetSubMatricesParallel_MPIAIJ, 3110 /*129*/0, 3111 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3112 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3113 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3114 0, 3115 /*134*/0, 3116 0, 3117 0, 3118 0, 3119 0 3120 }; 3121 3122 /* ----------------------------------------------------------------------------------------*/ 3123 3124 EXTERN_C_BEGIN 3125 #undef __FUNCT__ 3126 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3127 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3128 { 3129 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3130 PetscErrorCode ierr; 3131 3132 PetscFunctionBegin; 3133 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3134 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3135 PetscFunctionReturn(0); 3136 } 3137 EXTERN_C_END 3138 3139 EXTERN_C_BEGIN 3140 #undef __FUNCT__ 3141 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3142 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3143 { 3144 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3145 PetscErrorCode ierr; 3146 3147 PetscFunctionBegin; 3148 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3149 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3150 PetscFunctionReturn(0); 3151 } 3152 EXTERN_C_END 3153 3154 EXTERN_C_BEGIN 3155 #undef __FUNCT__ 3156 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3157 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3158 { 3159 Mat_MPIAIJ *b; 3160 PetscErrorCode ierr; 3161 PetscInt i; 3162 3163 PetscFunctionBegin; 3164 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3165 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3166 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3167 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3168 3169 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3170 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3171 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3172 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3173 if (d_nnz) { 3174 for (i=0; i<B->rmap->n; i++) { 3175 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3176 } 3177 } 3178 if (o_nnz) { 3179 for (i=0; i<B->rmap->n; i++) { 3180 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3181 } 3182 } 3183 b = (Mat_MPIAIJ*)B->data; 3184 3185 if (!B->preallocated) { 3186 /* Explicitly create 2 MATSEQAIJ matrices. */ 3187 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3188 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3189 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3190 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3191 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3192 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3193 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3194 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3195 } 3196 3197 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3198 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3199 B->preallocated = PETSC_TRUE; 3200 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3201 PetscFunctionReturn(0); 3202 } 3203 EXTERN_C_END 3204 3205 #undef __FUNCT__ 3206 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3207 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3208 { 3209 Mat mat; 3210 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3211 PetscErrorCode ierr; 3212 3213 PetscFunctionBegin; 3214 *newmat = 0; 3215 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3216 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3217 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3218 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3219 a = (Mat_MPIAIJ*)mat->data; 3220 3221 mat->factortype = matin->factortype; 3222 mat->rmap->bs = matin->rmap->bs; 3223 mat->assembled = PETSC_TRUE; 3224 mat->insertmode = NOT_SET_VALUES; 3225 mat->preallocated = PETSC_TRUE; 3226 3227 a->size = oldmat->size; 3228 a->rank = oldmat->rank; 3229 a->donotstash = oldmat->donotstash; 3230 a->roworiented = oldmat->roworiented; 3231 a->rowindices = 0; 3232 a->rowvalues = 0; 3233 a->getrowactive = PETSC_FALSE; 3234 3235 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3236 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3237 3238 if (oldmat->colmap) { 3239 #if defined (PETSC_USE_CTABLE) 3240 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3241 #else 3242 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3243 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3244 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3245 #endif 3246 } else a->colmap = 0; 3247 if (oldmat->garray) { 3248 PetscInt len; 3249 len = oldmat->B->cmap->n; 3250 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3251 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3252 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3253 } else a->garray = 0; 3254 3255 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3256 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3257 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3258 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3259 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3260 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3261 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3262 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3263 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3264 *newmat = mat; 3265 PetscFunctionReturn(0); 3266 } 3267 3268 3269 3270 #undef __FUNCT__ 3271 #define __FUNCT__ "MatLoad_MPIAIJ" 3272 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3273 { 3274 PetscScalar *vals,*svals; 3275 MPI_Comm comm = ((PetscObject)viewer)->comm; 3276 PetscErrorCode ierr; 3277 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3278 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3279 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3280 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3281 PetscInt cend,cstart,n,*rowners,sizesset=1; 3282 int fd; 3283 3284 PetscFunctionBegin; 3285 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3286 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3287 if (!rank) { 3288 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3289 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3290 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3291 } 3292 3293 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3294 3295 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3296 M = header[1]; N = header[2]; 3297 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3298 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3299 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3300 3301 /* If global sizes are set, check if they are consistent with that given in the file */ 3302 if (sizesset) { 3303 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3304 } 3305 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3306 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3307 3308 /* determine ownership of all rows */ 3309 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3310 else m = newMat->rmap->n; /* Set by user */ 3311 3312 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3313 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3314 3315 /* First process needs enough room for process with most rows */ 3316 if (!rank) { 3317 mmax = rowners[1]; 3318 for (i=2; i<size; i++) { 3319 mmax = PetscMax(mmax,rowners[i]); 3320 } 3321 } else mmax = m; 3322 3323 rowners[0] = 0; 3324 for (i=2; i<=size; i++) { 3325 rowners[i] += rowners[i-1]; 3326 } 3327 rstart = rowners[rank]; 3328 rend = rowners[rank+1]; 3329 3330 /* distribute row lengths to all processors */ 3331 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3332 if (!rank) { 3333 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3334 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3335 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3336 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3337 for (j=0; j<m; j++) { 3338 procsnz[0] += ourlens[j]; 3339 } 3340 for (i=1; i<size; i++) { 3341 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3342 /* calculate the number of nonzeros on each processor */ 3343 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3344 procsnz[i] += rowlengths[j]; 3345 } 3346 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3347 } 3348 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3349 } else { 3350 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3351 } 3352 3353 if (!rank) { 3354 /* determine max buffer needed and allocate it */ 3355 maxnz = 0; 3356 for (i=0; i<size; i++) { 3357 maxnz = PetscMax(maxnz,procsnz[i]); 3358 } 3359 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3360 3361 /* read in my part of the matrix column indices */ 3362 nz = procsnz[0]; 3363 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3364 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3365 3366 /* read in every one elses and ship off */ 3367 for (i=1; i<size; i++) { 3368 nz = procsnz[i]; 3369 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3370 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3371 } 3372 ierr = PetscFree(cols);CHKERRQ(ierr); 3373 } else { 3374 /* determine buffer space needed for message */ 3375 nz = 0; 3376 for (i=0; i<m; i++) { 3377 nz += ourlens[i]; 3378 } 3379 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3380 3381 /* receive message of column indices*/ 3382 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3383 } 3384 3385 /* determine column ownership if matrix is not square */ 3386 if (N != M) { 3387 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3388 else n = newMat->cmap->n; 3389 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3390 cstart = cend - n; 3391 } else { 3392 cstart = rstart; 3393 cend = rend; 3394 n = cend - cstart; 3395 } 3396 3397 /* loop over local rows, determining number of off diagonal entries */ 3398 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3399 jj = 0; 3400 for (i=0; i<m; i++) { 3401 for (j=0; j<ourlens[i]; j++) { 3402 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3403 jj++; 3404 } 3405 } 3406 3407 for (i=0; i<m; i++) { 3408 ourlens[i] -= offlens[i]; 3409 } 3410 if (!sizesset) { 3411 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3412 } 3413 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3414 3415 for (i=0; i<m; i++) { 3416 ourlens[i] += offlens[i]; 3417 } 3418 3419 if (!rank) { 3420 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3421 3422 /* read in my part of the matrix numerical values */ 3423 nz = procsnz[0]; 3424 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3425 3426 /* insert into matrix */ 3427 jj = rstart; 3428 smycols = mycols; 3429 svals = vals; 3430 for (i=0; i<m; i++) { 3431 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3432 smycols += ourlens[i]; 3433 svals += ourlens[i]; 3434 jj++; 3435 } 3436 3437 /* read in other processors and ship out */ 3438 for (i=1; i<size; i++) { 3439 nz = procsnz[i]; 3440 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3441 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3442 } 3443 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3444 } else { 3445 /* receive numeric values */ 3446 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3447 3448 /* receive message of values*/ 3449 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3450 3451 /* insert into matrix */ 3452 jj = rstart; 3453 smycols = mycols; 3454 svals = vals; 3455 for (i=0; i<m; i++) { 3456 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3457 smycols += ourlens[i]; 3458 svals += ourlens[i]; 3459 jj++; 3460 } 3461 } 3462 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3463 ierr = PetscFree(vals);CHKERRQ(ierr); 3464 ierr = PetscFree(mycols);CHKERRQ(ierr); 3465 ierr = PetscFree(rowners);CHKERRQ(ierr); 3466 3467 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3468 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3469 PetscFunctionReturn(0); 3470 } 3471 3472 #undef __FUNCT__ 3473 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3474 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3475 { 3476 PetscErrorCode ierr; 3477 IS iscol_local; 3478 PetscInt csize; 3479 3480 PetscFunctionBegin; 3481 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3482 if (call == MAT_REUSE_MATRIX) { 3483 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3484 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3485 } else { 3486 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3487 } 3488 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3489 if (call == MAT_INITIAL_MATRIX) { 3490 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3491 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3492 } 3493 PetscFunctionReturn(0); 3494 } 3495 3496 #undef __FUNCT__ 3497 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3498 /* 3499 Not great since it makes two copies of the submatrix, first an SeqAIJ 3500 in local and then by concatenating the local matrices the end result. 3501 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3502 3503 Note: This requires a sequential iscol with all indices. 3504 */ 3505 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3506 { 3507 PetscErrorCode ierr; 3508 PetscMPIInt rank,size; 3509 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3510 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3511 Mat *local,M,Mreuse; 3512 MatScalar *vwork,*aa; 3513 MPI_Comm comm = ((PetscObject)mat)->comm; 3514 Mat_SeqAIJ *aij; 3515 3516 3517 PetscFunctionBegin; 3518 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3519 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3520 3521 if (call == MAT_REUSE_MATRIX) { 3522 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3523 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3524 local = &Mreuse; 3525 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3526 } else { 3527 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3528 Mreuse = *local; 3529 ierr = PetscFree(local);CHKERRQ(ierr); 3530 } 3531 3532 /* 3533 m - number of local rows 3534 n - number of columns (same on all processors) 3535 rstart - first row in new global matrix generated 3536 */ 3537 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3538 if (call == MAT_INITIAL_MATRIX) { 3539 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3540 ii = aij->i; 3541 jj = aij->j; 3542 3543 /* 3544 Determine the number of non-zeros in the diagonal and off-diagonal 3545 portions of the matrix in order to do correct preallocation 3546 */ 3547 3548 /* first get start and end of "diagonal" columns */ 3549 if (csize == PETSC_DECIDE) { 3550 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3551 if (mglobal == n) { /* square matrix */ 3552 nlocal = m; 3553 } else { 3554 nlocal = n/size + ((n % size) > rank); 3555 } 3556 } else { 3557 nlocal = csize; 3558 } 3559 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3560 rstart = rend - nlocal; 3561 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3562 3563 /* next, compute all the lengths */ 3564 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3565 olens = dlens + m; 3566 for (i=0; i<m; i++) { 3567 jend = ii[i+1] - ii[i]; 3568 olen = 0; 3569 dlen = 0; 3570 for (j=0; j<jend; j++) { 3571 if (*jj < rstart || *jj >= rend) olen++; 3572 else dlen++; 3573 jj++; 3574 } 3575 olens[i] = olen; 3576 dlens[i] = dlen; 3577 } 3578 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3579 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3580 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3581 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3582 ierr = PetscFree(dlens);CHKERRQ(ierr); 3583 } else { 3584 PetscInt ml,nl; 3585 3586 M = *newmat; 3587 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3588 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3589 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3590 /* 3591 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3592 rather than the slower MatSetValues(). 3593 */ 3594 M->was_assembled = PETSC_TRUE; 3595 M->assembled = PETSC_FALSE; 3596 } 3597 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3598 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3599 ii = aij->i; 3600 jj = aij->j; 3601 aa = aij->a; 3602 for (i=0; i<m; i++) { 3603 row = rstart + i; 3604 nz = ii[i+1] - ii[i]; 3605 cwork = jj; jj += nz; 3606 vwork = aa; aa += nz; 3607 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3608 } 3609 3610 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3611 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3612 *newmat = M; 3613 3614 /* save submatrix used in processor for next request */ 3615 if (call == MAT_INITIAL_MATRIX) { 3616 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3617 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3618 } 3619 3620 PetscFunctionReturn(0); 3621 } 3622 3623 EXTERN_C_BEGIN 3624 #undef __FUNCT__ 3625 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3626 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3627 { 3628 PetscInt m,cstart, cend,j,nnz,i,d; 3629 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3630 const PetscInt *JJ; 3631 PetscScalar *values; 3632 PetscErrorCode ierr; 3633 3634 PetscFunctionBegin; 3635 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3636 3637 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3638 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3639 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3640 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3641 m = B->rmap->n; 3642 cstart = B->cmap->rstart; 3643 cend = B->cmap->rend; 3644 rstart = B->rmap->rstart; 3645 3646 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3647 3648 #if defined(PETSC_USE_DEBUGGING) 3649 for (i=0; i<m; i++) { 3650 nnz = Ii[i+1]- Ii[i]; 3651 JJ = J + Ii[i]; 3652 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3653 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3654 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3655 } 3656 #endif 3657 3658 for (i=0; i<m; i++) { 3659 nnz = Ii[i+1]- Ii[i]; 3660 JJ = J + Ii[i]; 3661 nnz_max = PetscMax(nnz_max,nnz); 3662 d = 0; 3663 for (j=0; j<nnz; j++) { 3664 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3665 } 3666 d_nnz[i] = d; 3667 o_nnz[i] = nnz - d; 3668 } 3669 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3670 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3671 3672 if (v) values = (PetscScalar*)v; 3673 else { 3674 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3675 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3676 } 3677 3678 for (i=0; i<m; i++) { 3679 ii = i + rstart; 3680 nnz = Ii[i+1]- Ii[i]; 3681 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3682 } 3683 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3684 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3685 3686 if (!v) { 3687 ierr = PetscFree(values);CHKERRQ(ierr); 3688 } 3689 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3690 PetscFunctionReturn(0); 3691 } 3692 EXTERN_C_END 3693 3694 #undef __FUNCT__ 3695 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3696 /*@ 3697 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3698 (the default parallel PETSc format). 3699 3700 Collective on MPI_Comm 3701 3702 Input Parameters: 3703 + B - the matrix 3704 . i - the indices into j for the start of each local row (starts with zero) 3705 . j - the column indices for each local row (starts with zero) 3706 - v - optional values in the matrix 3707 3708 Level: developer 3709 3710 Notes: 3711 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3712 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3713 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3714 3715 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3716 3717 The format which is used for the sparse matrix input, is equivalent to a 3718 row-major ordering.. i.e for the following matrix, the input data expected is 3719 as shown: 3720 3721 1 0 0 3722 2 0 3 P0 3723 ------- 3724 4 5 6 P1 3725 3726 Process0 [P0]: rows_owned=[0,1] 3727 i = {0,1,3} [size = nrow+1 = 2+1] 3728 j = {0,0,2} [size = nz = 6] 3729 v = {1,2,3} [size = nz = 6] 3730 3731 Process1 [P1]: rows_owned=[2] 3732 i = {0,3} [size = nrow+1 = 1+1] 3733 j = {0,1,2} [size = nz = 6] 3734 v = {4,5,6} [size = nz = 6] 3735 3736 .keywords: matrix, aij, compressed row, sparse, parallel 3737 3738 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3739 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3740 @*/ 3741 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3742 { 3743 PetscErrorCode ierr; 3744 3745 PetscFunctionBegin; 3746 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3747 PetscFunctionReturn(0); 3748 } 3749 3750 #undef __FUNCT__ 3751 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3752 /*@C 3753 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3754 (the default parallel PETSc format). For good matrix assembly performance 3755 the user should preallocate the matrix storage by setting the parameters 3756 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3757 performance can be increased by more than a factor of 50. 3758 3759 Collective on MPI_Comm 3760 3761 Input Parameters: 3762 + A - the matrix 3763 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3764 (same value is used for all local rows) 3765 . d_nnz - array containing the number of nonzeros in the various rows of the 3766 DIAGONAL portion of the local submatrix (possibly different for each row) 3767 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3768 The size of this array is equal to the number of local rows, i.e 'm'. 3769 You must leave room for the diagonal entry even if it is zero. 3770 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3771 submatrix (same value is used for all local rows). 3772 - o_nnz - array containing the number of nonzeros in the various rows of the 3773 OFF-DIAGONAL portion of the local submatrix (possibly different for 3774 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3775 structure. The size of this array is equal to the number 3776 of local rows, i.e 'm'. 3777 3778 If the *_nnz parameter is given then the *_nz parameter is ignored 3779 3780 The AIJ format (also called the Yale sparse matrix format or 3781 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3782 storage. The stored row and column indices begin with zero. 3783 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3784 3785 The parallel matrix is partitioned such that the first m0 rows belong to 3786 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3787 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3788 3789 The DIAGONAL portion of the local submatrix of a processor can be defined 3790 as the submatrix which is obtained by extraction the part corresponding to 3791 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3792 first row that belongs to the processor, r2 is the last row belonging to 3793 the this processor, and c1-c2 is range of indices of the local part of a 3794 vector suitable for applying the matrix to. This is an mxn matrix. In the 3795 common case of a square matrix, the row and column ranges are the same and 3796 the DIAGONAL part is also square. The remaining portion of the local 3797 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3798 3799 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3800 3801 You can call MatGetInfo() to get information on how effective the preallocation was; 3802 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3803 You can also run with the option -info and look for messages with the string 3804 malloc in them to see if additional memory allocation was needed. 3805 3806 Example usage: 3807 3808 Consider the following 8x8 matrix with 34 non-zero values, that is 3809 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3810 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3811 as follows: 3812 3813 .vb 3814 1 2 0 | 0 3 0 | 0 4 3815 Proc0 0 5 6 | 7 0 0 | 8 0 3816 9 0 10 | 11 0 0 | 12 0 3817 ------------------------------------- 3818 13 0 14 | 15 16 17 | 0 0 3819 Proc1 0 18 0 | 19 20 21 | 0 0 3820 0 0 0 | 22 23 0 | 24 0 3821 ------------------------------------- 3822 Proc2 25 26 27 | 0 0 28 | 29 0 3823 30 0 0 | 31 32 33 | 0 34 3824 .ve 3825 3826 This can be represented as a collection of submatrices as: 3827 3828 .vb 3829 A B C 3830 D E F 3831 G H I 3832 .ve 3833 3834 Where the submatrices A,B,C are owned by proc0, D,E,F are 3835 owned by proc1, G,H,I are owned by proc2. 3836 3837 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3838 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3839 The 'M','N' parameters are 8,8, and have the same values on all procs. 3840 3841 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3842 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3843 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3844 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3845 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3846 matrix, ans [DF] as another SeqAIJ matrix. 3847 3848 When d_nz, o_nz parameters are specified, d_nz storage elements are 3849 allocated for every row of the local diagonal submatrix, and o_nz 3850 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3851 One way to choose d_nz and o_nz is to use the max nonzerors per local 3852 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3853 In this case, the values of d_nz,o_nz are: 3854 .vb 3855 proc0 : dnz = 2, o_nz = 2 3856 proc1 : dnz = 3, o_nz = 2 3857 proc2 : dnz = 1, o_nz = 4 3858 .ve 3859 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3860 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3861 for proc3. i.e we are using 12+15+10=37 storage locations to store 3862 34 values. 3863 3864 When d_nnz, o_nnz parameters are specified, the storage is specified 3865 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3866 In the above case the values for d_nnz,o_nnz are: 3867 .vb 3868 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3869 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3870 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3871 .ve 3872 Here the space allocated is sum of all the above values i.e 34, and 3873 hence pre-allocation is perfect. 3874 3875 Level: intermediate 3876 3877 .keywords: matrix, aij, compressed row, sparse, parallel 3878 3879 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3880 MPIAIJ, MatGetInfo() 3881 @*/ 3882 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3883 { 3884 PetscErrorCode ierr; 3885 3886 PetscFunctionBegin; 3887 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3888 PetscValidType(B,1); 3889 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3890 PetscFunctionReturn(0); 3891 } 3892 3893 #undef __FUNCT__ 3894 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3895 /*@ 3896 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3897 CSR format the local rows. 3898 3899 Collective on MPI_Comm 3900 3901 Input Parameters: 3902 + comm - MPI communicator 3903 . m - number of local rows (Cannot be PETSC_DECIDE) 3904 . n - This value should be the same as the local size used in creating the 3905 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3906 calculated if N is given) For square matrices n is almost always m. 3907 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3908 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3909 . i - row indices 3910 . j - column indices 3911 - a - matrix values 3912 3913 Output Parameter: 3914 . mat - the matrix 3915 3916 Level: intermediate 3917 3918 Notes: 3919 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3920 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3921 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3922 3923 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3924 3925 The format which is used for the sparse matrix input, is equivalent to a 3926 row-major ordering.. i.e for the following matrix, the input data expected is 3927 as shown: 3928 3929 1 0 0 3930 2 0 3 P0 3931 ------- 3932 4 5 6 P1 3933 3934 Process0 [P0]: rows_owned=[0,1] 3935 i = {0,1,3} [size = nrow+1 = 2+1] 3936 j = {0,0,2} [size = nz = 6] 3937 v = {1,2,3} [size = nz = 6] 3938 3939 Process1 [P1]: rows_owned=[2] 3940 i = {0,3} [size = nrow+1 = 1+1] 3941 j = {0,1,2} [size = nz = 6] 3942 v = {4,5,6} [size = nz = 6] 3943 3944 .keywords: matrix, aij, compressed row, sparse, parallel 3945 3946 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3947 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3948 @*/ 3949 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3950 { 3951 PetscErrorCode ierr; 3952 3953 PetscFunctionBegin; 3954 if (i[0]) { 3955 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3956 } 3957 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3958 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3959 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3960 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3961 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3962 PetscFunctionReturn(0); 3963 } 3964 3965 #undef __FUNCT__ 3966 #define __FUNCT__ "MatCreateMPIAIJ" 3967 /*@C 3968 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3969 (the default parallel PETSc format). For good matrix assembly performance 3970 the user should preallocate the matrix storage by setting the parameters 3971 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3972 performance can be increased by more than a factor of 50. 3973 3974 Collective on MPI_Comm 3975 3976 Input Parameters: 3977 + comm - MPI communicator 3978 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3979 This value should be the same as the local size used in creating the 3980 y vector for the matrix-vector product y = Ax. 3981 . n - This value should be the same as the local size used in creating the 3982 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3983 calculated if N is given) For square matrices n is almost always m. 3984 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3985 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3986 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3987 (same value is used for all local rows) 3988 . d_nnz - array containing the number of nonzeros in the various rows of the 3989 DIAGONAL portion of the local submatrix (possibly different for each row) 3990 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3991 The size of this array is equal to the number of local rows, i.e 'm'. 3992 You must leave room for the diagonal entry even if it is zero. 3993 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3994 submatrix (same value is used for all local rows). 3995 - o_nnz - array containing the number of nonzeros in the various rows of the 3996 OFF-DIAGONAL portion of the local submatrix (possibly different for 3997 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3998 structure. The size of this array is equal to the number 3999 of local rows, i.e 'm'. 4000 4001 Output Parameter: 4002 . A - the matrix 4003 4004 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4005 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4006 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4007 4008 Notes: 4009 If the *_nnz parameter is given then the *_nz parameter is ignored 4010 4011 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4012 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4013 storage requirements for this matrix. 4014 4015 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4016 processor than it must be used on all processors that share the object for 4017 that argument. 4018 4019 The user MUST specify either the local or global matrix dimensions 4020 (possibly both). 4021 4022 The parallel matrix is partitioned across processors such that the 4023 first m0 rows belong to process 0, the next m1 rows belong to 4024 process 1, the next m2 rows belong to process 2 etc.. where 4025 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4026 values corresponding to [m x N] submatrix. 4027 4028 The columns are logically partitioned with the n0 columns belonging 4029 to 0th partition, the next n1 columns belonging to the next 4030 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4031 4032 The DIAGONAL portion of the local submatrix on any given processor 4033 is the submatrix corresponding to the rows and columns m,n 4034 corresponding to the given processor. i.e diagonal matrix on 4035 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4036 etc. The remaining portion of the local submatrix [m x (N-n)] 4037 constitute the OFF-DIAGONAL portion. The example below better 4038 illustrates this concept. 4039 4040 For a square global matrix we define each processor's diagonal portion 4041 to be its local rows and the corresponding columns (a square submatrix); 4042 each processor's off-diagonal portion encompasses the remainder of the 4043 local matrix (a rectangular submatrix). 4044 4045 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4046 4047 When calling this routine with a single process communicator, a matrix of 4048 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4049 type of communicator, use the construction mechanism: 4050 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4051 4052 By default, this format uses inodes (identical nodes) when possible. 4053 We search for consecutive rows with the same nonzero structure, thereby 4054 reusing matrix information to achieve increased efficiency. 4055 4056 Options Database Keys: 4057 + -mat_no_inode - Do not use inodes 4058 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4059 - -mat_aij_oneindex - Internally use indexing starting at 1 4060 rather than 0. Note that when calling MatSetValues(), 4061 the user still MUST index entries starting at 0! 4062 4063 4064 Example usage: 4065 4066 Consider the following 8x8 matrix with 34 non-zero values, that is 4067 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4068 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4069 as follows: 4070 4071 .vb 4072 1 2 0 | 0 3 0 | 0 4 4073 Proc0 0 5 6 | 7 0 0 | 8 0 4074 9 0 10 | 11 0 0 | 12 0 4075 ------------------------------------- 4076 13 0 14 | 15 16 17 | 0 0 4077 Proc1 0 18 0 | 19 20 21 | 0 0 4078 0 0 0 | 22 23 0 | 24 0 4079 ------------------------------------- 4080 Proc2 25 26 27 | 0 0 28 | 29 0 4081 30 0 0 | 31 32 33 | 0 34 4082 .ve 4083 4084 This can be represented as a collection of submatrices as: 4085 4086 .vb 4087 A B C 4088 D E F 4089 G H I 4090 .ve 4091 4092 Where the submatrices A,B,C are owned by proc0, D,E,F are 4093 owned by proc1, G,H,I are owned by proc2. 4094 4095 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4096 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4097 The 'M','N' parameters are 8,8, and have the same values on all procs. 4098 4099 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4100 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4101 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4102 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4103 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4104 matrix, ans [DF] as another SeqAIJ matrix. 4105 4106 When d_nz, o_nz parameters are specified, d_nz storage elements are 4107 allocated for every row of the local diagonal submatrix, and o_nz 4108 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4109 One way to choose d_nz and o_nz is to use the max nonzerors per local 4110 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4111 In this case, the values of d_nz,o_nz are: 4112 .vb 4113 proc0 : dnz = 2, o_nz = 2 4114 proc1 : dnz = 3, o_nz = 2 4115 proc2 : dnz = 1, o_nz = 4 4116 .ve 4117 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4118 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4119 for proc3. i.e we are using 12+15+10=37 storage locations to store 4120 34 values. 4121 4122 When d_nnz, o_nnz parameters are specified, the storage is specified 4123 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4124 In the above case the values for d_nnz,o_nnz are: 4125 .vb 4126 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4127 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4128 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4129 .ve 4130 Here the space allocated is sum of all the above values i.e 34, and 4131 hence pre-allocation is perfect. 4132 4133 Level: intermediate 4134 4135 .keywords: matrix, aij, compressed row, sparse, parallel 4136 4137 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4138 MPIAIJ, MatCreateMPIAIJWithArrays() 4139 @*/ 4140 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4141 { 4142 PetscErrorCode ierr; 4143 PetscMPIInt size; 4144 4145 PetscFunctionBegin; 4146 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4147 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4148 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4149 if (size > 1) { 4150 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4151 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4152 } else { 4153 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4154 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4155 } 4156 PetscFunctionReturn(0); 4157 } 4158 4159 #undef __FUNCT__ 4160 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4161 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4162 { 4163 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4164 4165 PetscFunctionBegin; 4166 *Ad = a->A; 4167 *Ao = a->B; 4168 *colmap = a->garray; 4169 PetscFunctionReturn(0); 4170 } 4171 4172 #undef __FUNCT__ 4173 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4174 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4175 { 4176 PetscErrorCode ierr; 4177 PetscInt i; 4178 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4179 4180 PetscFunctionBegin; 4181 if (coloring->ctype == IS_COLORING_GLOBAL) { 4182 ISColoringValue *allcolors,*colors; 4183 ISColoring ocoloring; 4184 4185 /* set coloring for diagonal portion */ 4186 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4187 4188 /* set coloring for off-diagonal portion */ 4189 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4190 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4191 for (i=0; i<a->B->cmap->n; i++) { 4192 colors[i] = allcolors[a->garray[i]]; 4193 } 4194 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4195 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4196 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4197 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4198 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4199 ISColoringValue *colors; 4200 PetscInt *larray; 4201 ISColoring ocoloring; 4202 4203 /* set coloring for diagonal portion */ 4204 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4205 for (i=0; i<a->A->cmap->n; i++) { 4206 larray[i] = i + A->cmap->rstart; 4207 } 4208 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4209 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4210 for (i=0; i<a->A->cmap->n; i++) { 4211 colors[i] = coloring->colors[larray[i]]; 4212 } 4213 ierr = PetscFree(larray);CHKERRQ(ierr); 4214 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4215 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4216 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4217 4218 /* set coloring for off-diagonal portion */ 4219 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4220 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4221 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4222 for (i=0; i<a->B->cmap->n; i++) { 4223 colors[i] = coloring->colors[larray[i]]; 4224 } 4225 ierr = PetscFree(larray);CHKERRQ(ierr); 4226 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4227 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4228 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4229 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4230 4231 PetscFunctionReturn(0); 4232 } 4233 4234 #if defined(PETSC_HAVE_ADIC) 4235 #undef __FUNCT__ 4236 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4237 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4238 { 4239 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4240 PetscErrorCode ierr; 4241 4242 PetscFunctionBegin; 4243 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4244 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4245 PetscFunctionReturn(0); 4246 } 4247 #endif 4248 4249 #undef __FUNCT__ 4250 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4251 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4252 { 4253 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4254 PetscErrorCode ierr; 4255 4256 PetscFunctionBegin; 4257 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4258 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4259 PetscFunctionReturn(0); 4260 } 4261 4262 #undef __FUNCT__ 4263 #define __FUNCT__ "MatMergeSymbolic" 4264 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4265 { 4266 PetscErrorCode ierr; 4267 PetscInt m,N,i,rstart,nnz,*dnz,*onz; 4268 PetscInt *indx; 4269 4270 PetscFunctionBegin; 4271 /* This routine will ONLY return MPIAIJ type matrix */ 4272 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4273 if (n == PETSC_DECIDE){ 4274 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4275 } 4276 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4277 rstart -= m; 4278 4279 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4280 for (i=0;i<m;i++) { 4281 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4282 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4283 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4284 } 4285 4286 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4287 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4288 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4289 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4290 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4291 PetscFunctionReturn(0); 4292 } 4293 4294 #undef __FUNCT__ 4295 #define __FUNCT__ "MatMergeNumeric" 4296 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4297 { 4298 PetscErrorCode ierr; 4299 PetscInt m,N,i,rstart,nnz,Ii; 4300 PetscInt *indx; 4301 PetscScalar *values; 4302 4303 PetscFunctionBegin; 4304 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4305 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4306 for (i=0;i<m;i++) { 4307 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4308 Ii = i + rstart; 4309 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4310 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4311 } 4312 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4313 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4314 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4315 PetscFunctionReturn(0); 4316 } 4317 4318 #undef __FUNCT__ 4319 #define __FUNCT__ "MatMerge" 4320 /*@ 4321 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4322 matrices from each processor 4323 4324 Collective on MPI_Comm 4325 4326 Input Parameters: 4327 + comm - the communicators the parallel matrix will live on 4328 . inmat - the input sequential matrices 4329 . n - number of local columns (or PETSC_DECIDE) 4330 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4331 4332 Output Parameter: 4333 . outmat - the parallel matrix generated 4334 4335 Level: advanced 4336 4337 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4338 4339 @*/ 4340 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4341 { 4342 PetscErrorCode ierr; 4343 4344 PetscFunctionBegin; 4345 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4346 if (scall == MAT_INITIAL_MATRIX){ 4347 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4348 } 4349 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4350 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4351 PetscFunctionReturn(0); 4352 } 4353 4354 #undef __FUNCT__ 4355 #define __FUNCT__ "MatFileSplit" 4356 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4357 { 4358 PetscErrorCode ierr; 4359 PetscMPIInt rank; 4360 PetscInt m,N,i,rstart,nnz; 4361 size_t len; 4362 const PetscInt *indx; 4363 PetscViewer out; 4364 char *name; 4365 Mat B; 4366 const PetscScalar *values; 4367 4368 PetscFunctionBegin; 4369 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4370 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4371 /* Should this be the type of the diagonal block of A? */ 4372 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4373 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4374 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4375 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4376 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4377 for (i=0;i<m;i++) { 4378 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4379 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4380 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4381 } 4382 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4383 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4384 4385 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4386 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4387 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4388 sprintf(name,"%s.%d",outfile,rank); 4389 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4390 ierr = PetscFree(name); 4391 ierr = MatView(B,out);CHKERRQ(ierr); 4392 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4393 ierr = MatDestroy(&B);CHKERRQ(ierr); 4394 PetscFunctionReturn(0); 4395 } 4396 4397 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4398 #undef __FUNCT__ 4399 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4400 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4401 { 4402 PetscErrorCode ierr; 4403 Mat_Merge_SeqsToMPI *merge; 4404 PetscContainer container; 4405 4406 PetscFunctionBegin; 4407 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4408 if (container) { 4409 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4410 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4411 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4412 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4413 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4414 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4415 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4416 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4417 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4418 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4419 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4420 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4422 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4423 ierr = PetscFree(merge);CHKERRQ(ierr); 4424 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4425 } 4426 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4427 PetscFunctionReturn(0); 4428 } 4429 4430 #include <../src/mat/utils/freespace.h> 4431 #include <petscbt.h> 4432 4433 #undef __FUNCT__ 4434 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4435 /*@C 4436 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4437 matrices from each processor 4438 4439 Collective on MPI_Comm 4440 4441 Input Parameters: 4442 + comm - the communicators the parallel matrix will live on 4443 . seqmat - the input sequential matrices 4444 . m - number of local rows (or PETSC_DECIDE) 4445 . n - number of local columns (or PETSC_DECIDE) 4446 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4447 4448 Output Parameter: 4449 . mpimat - the parallel matrix generated 4450 4451 Level: advanced 4452 4453 Notes: 4454 The dimensions of the sequential matrix in each processor MUST be the same. 4455 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4456 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4457 @*/ 4458 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4459 { 4460 PetscErrorCode ierr; 4461 MPI_Comm comm=((PetscObject)mpimat)->comm; 4462 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4463 PetscMPIInt size,rank,taga,*len_s; 4464 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4465 PetscInt proc,m; 4466 PetscInt **buf_ri,**buf_rj; 4467 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4468 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4469 MPI_Request *s_waits,*r_waits; 4470 MPI_Status *status; 4471 MatScalar *aa=a->a; 4472 MatScalar **abuf_r,*ba_i; 4473 Mat_Merge_SeqsToMPI *merge; 4474 PetscContainer container; 4475 4476 PetscFunctionBegin; 4477 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4478 4479 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4480 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4481 4482 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4483 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4484 4485 bi = merge->bi; 4486 bj = merge->bj; 4487 buf_ri = merge->buf_ri; 4488 buf_rj = merge->buf_rj; 4489 4490 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4491 owners = merge->rowmap->range; 4492 len_s = merge->len_s; 4493 4494 /* send and recv matrix values */ 4495 /*-----------------------------*/ 4496 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4497 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4498 4499 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4500 for (proc=0,k=0; proc<size; proc++){ 4501 if (!len_s[proc]) continue; 4502 i = owners[proc]; 4503 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4504 k++; 4505 } 4506 4507 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4508 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4509 ierr = PetscFree(status);CHKERRQ(ierr); 4510 4511 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4512 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4513 4514 /* insert mat values of mpimat */ 4515 /*----------------------------*/ 4516 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4517 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4518 4519 for (k=0; k<merge->nrecv; k++){ 4520 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4521 nrows = *(buf_ri_k[k]); 4522 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4523 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4524 } 4525 4526 /* set values of ba */ 4527 m = merge->rowmap->n; 4528 for (i=0; i<m; i++) { 4529 arow = owners[rank] + i; 4530 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4531 bnzi = bi[i+1] - bi[i]; 4532 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4533 4534 /* add local non-zero vals of this proc's seqmat into ba */ 4535 anzi = ai[arow+1] - ai[arow]; 4536 aj = a->j + ai[arow]; 4537 aa = a->a + ai[arow]; 4538 nextaj = 0; 4539 for (j=0; nextaj<anzi; j++){ 4540 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4541 ba_i[j] += aa[nextaj++]; 4542 } 4543 } 4544 4545 /* add received vals into ba */ 4546 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4547 /* i-th row */ 4548 if (i == *nextrow[k]) { 4549 anzi = *(nextai[k]+1) - *nextai[k]; 4550 aj = buf_rj[k] + *(nextai[k]); 4551 aa = abuf_r[k] + *(nextai[k]); 4552 nextaj = 0; 4553 for (j=0; nextaj<anzi; j++){ 4554 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4555 ba_i[j] += aa[nextaj++]; 4556 } 4557 } 4558 nextrow[k]++; nextai[k]++; 4559 } 4560 } 4561 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4562 } 4563 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4564 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4565 4566 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4567 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4568 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4569 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4570 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4571 PetscFunctionReturn(0); 4572 } 4573 4574 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4575 4576 #undef __FUNCT__ 4577 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4578 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4579 { 4580 PetscErrorCode ierr; 4581 Mat B_mpi; 4582 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4583 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4584 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4585 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4586 PetscInt len,proc,*dnz,*onz; 4587 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4588 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4589 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4590 MPI_Status *status; 4591 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4592 PetscBT lnkbt; 4593 Mat_Merge_SeqsToMPI *merge; 4594 PetscContainer container; 4595 4596 PetscFunctionBegin; 4597 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4598 4599 /* make sure it is a PETSc comm */ 4600 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4601 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4602 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4603 4604 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4605 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4606 4607 /* determine row ownership */ 4608 /*---------------------------------------------------------*/ 4609 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4610 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4611 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4612 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4613 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4614 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4615 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4616 4617 m = merge->rowmap->n; 4618 M = merge->rowmap->N; 4619 owners = merge->rowmap->range; 4620 4621 /* determine the number of messages to send, their lengths */ 4622 /*---------------------------------------------------------*/ 4623 len_s = merge->len_s; 4624 4625 len = 0; /* length of buf_si[] */ 4626 merge->nsend = 0; 4627 for (proc=0; proc<size; proc++){ 4628 len_si[proc] = 0; 4629 if (proc == rank){ 4630 len_s[proc] = 0; 4631 } else { 4632 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4633 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4634 } 4635 if (len_s[proc]) { 4636 merge->nsend++; 4637 nrows = 0; 4638 for (i=owners[proc]; i<owners[proc+1]; i++){ 4639 if (ai[i+1] > ai[i]) nrows++; 4640 } 4641 len_si[proc] = 2*(nrows+1); 4642 len += len_si[proc]; 4643 } 4644 } 4645 4646 /* determine the number and length of messages to receive for ij-structure */ 4647 /*-------------------------------------------------------------------------*/ 4648 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4649 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4650 4651 /* post the Irecv of j-structure */ 4652 /*-------------------------------*/ 4653 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4654 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4655 4656 /* post the Isend of j-structure */ 4657 /*--------------------------------*/ 4658 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4659 4660 for (proc=0, k=0; proc<size; proc++){ 4661 if (!len_s[proc]) continue; 4662 i = owners[proc]; 4663 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4664 k++; 4665 } 4666 4667 /* receives and sends of j-structure are complete */ 4668 /*------------------------------------------------*/ 4669 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4670 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4671 4672 /* send and recv i-structure */ 4673 /*---------------------------*/ 4674 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4675 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4676 4677 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4678 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4679 for (proc=0,k=0; proc<size; proc++){ 4680 if (!len_s[proc]) continue; 4681 /* form outgoing message for i-structure: 4682 buf_si[0]: nrows to be sent 4683 [1:nrows]: row index (global) 4684 [nrows+1:2*nrows+1]: i-structure index 4685 */ 4686 /*-------------------------------------------*/ 4687 nrows = len_si[proc]/2 - 1; 4688 buf_si_i = buf_si + nrows+1; 4689 buf_si[0] = nrows; 4690 buf_si_i[0] = 0; 4691 nrows = 0; 4692 for (i=owners[proc]; i<owners[proc+1]; i++){ 4693 anzi = ai[i+1] - ai[i]; 4694 if (anzi) { 4695 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4696 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4697 nrows++; 4698 } 4699 } 4700 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4701 k++; 4702 buf_si += len_si[proc]; 4703 } 4704 4705 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4706 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4707 4708 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4709 for (i=0; i<merge->nrecv; i++){ 4710 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4711 } 4712 4713 ierr = PetscFree(len_si);CHKERRQ(ierr); 4714 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4715 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4716 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4717 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4718 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4719 ierr = PetscFree(status);CHKERRQ(ierr); 4720 4721 /* compute a local seq matrix in each processor */ 4722 /*----------------------------------------------*/ 4723 /* allocate bi array and free space for accumulating nonzero column info */ 4724 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4725 bi[0] = 0; 4726 4727 /* create and initialize a linked list */ 4728 nlnk = N+1; 4729 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4730 4731 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4732 len = 0; 4733 len = ai[owners[rank+1]] - ai[owners[rank]]; 4734 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4735 current_space = free_space; 4736 4737 /* determine symbolic info for each local row */ 4738 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4739 4740 for (k=0; k<merge->nrecv; k++){ 4741 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4742 nrows = *buf_ri_k[k]; 4743 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4744 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4745 } 4746 4747 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4748 len = 0; 4749 for (i=0;i<m;i++) { 4750 bnzi = 0; 4751 /* add local non-zero cols of this proc's seqmat into lnk */ 4752 arow = owners[rank] + i; 4753 anzi = ai[arow+1] - ai[arow]; 4754 aj = a->j + ai[arow]; 4755 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4756 bnzi += nlnk; 4757 /* add received col data into lnk */ 4758 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4759 if (i == *nextrow[k]) { /* i-th row */ 4760 anzi = *(nextai[k]+1) - *nextai[k]; 4761 aj = buf_rj[k] + *nextai[k]; 4762 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4763 bnzi += nlnk; 4764 nextrow[k]++; nextai[k]++; 4765 } 4766 } 4767 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4768 4769 /* if free space is not available, make more free space */ 4770 if (current_space->local_remaining<bnzi) { 4771 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4772 nspacedouble++; 4773 } 4774 /* copy data into free space, then initialize lnk */ 4775 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4776 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4777 4778 current_space->array += bnzi; 4779 current_space->local_used += bnzi; 4780 current_space->local_remaining -= bnzi; 4781 4782 bi[i+1] = bi[i] + bnzi; 4783 } 4784 4785 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4786 4787 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4788 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4789 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4790 4791 /* create symbolic parallel matrix B_mpi */ 4792 /*---------------------------------------*/ 4793 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4794 if (n==PETSC_DECIDE) { 4795 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4796 } else { 4797 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4798 } 4799 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4800 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4801 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4802 4803 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4804 B_mpi->assembled = PETSC_FALSE; 4805 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4806 merge->bi = bi; 4807 merge->bj = bj; 4808 merge->buf_ri = buf_ri; 4809 merge->buf_rj = buf_rj; 4810 merge->coi = PETSC_NULL; 4811 merge->coj = PETSC_NULL; 4812 merge->owners_co = PETSC_NULL; 4813 4814 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4815 4816 /* attach the supporting struct to B_mpi for reuse */ 4817 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4818 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4819 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4820 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4821 *mpimat = B_mpi; 4822 4823 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4824 PetscFunctionReturn(0); 4825 } 4826 4827 #undef __FUNCT__ 4828 #define __FUNCT__ "MatMerge_SeqsToMPI" 4829 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4830 { 4831 PetscErrorCode ierr; 4832 4833 PetscFunctionBegin; 4834 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4835 if (scall == MAT_INITIAL_MATRIX){ 4836 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4837 } 4838 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4839 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4840 PetscFunctionReturn(0); 4841 } 4842 4843 #undef __FUNCT__ 4844 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4845 /*@ 4846 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4847 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4848 with MatGetSize() 4849 4850 Not Collective 4851 4852 Input Parameters: 4853 + A - the matrix 4854 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4855 4856 Output Parameter: 4857 . A_loc - the local sequential matrix generated 4858 4859 Level: developer 4860 4861 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4862 4863 @*/ 4864 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4865 { 4866 PetscErrorCode ierr; 4867 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4868 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4869 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4870 MatScalar *aa=a->a,*ba=b->a,*cam; 4871 PetscScalar *ca; 4872 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4873 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4874 PetscBool match; 4875 4876 PetscFunctionBegin; 4877 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4878 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4879 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4880 if (scall == MAT_INITIAL_MATRIX){ 4881 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4882 ci[0] = 0; 4883 for (i=0; i<am; i++){ 4884 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4885 } 4886 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4887 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4888 k = 0; 4889 for (i=0; i<am; i++) { 4890 ncols_o = bi[i+1] - bi[i]; 4891 ncols_d = ai[i+1] - ai[i]; 4892 /* off-diagonal portion of A */ 4893 for (jo=0; jo<ncols_o; jo++) { 4894 col = cmap[*bj]; 4895 if (col >= cstart) break; 4896 cj[k] = col; bj++; 4897 ca[k++] = *ba++; 4898 } 4899 /* diagonal portion of A */ 4900 for (j=0; j<ncols_d; j++) { 4901 cj[k] = cstart + *aj++; 4902 ca[k++] = *aa++; 4903 } 4904 /* off-diagonal portion of A */ 4905 for (j=jo; j<ncols_o; j++) { 4906 cj[k] = cmap[*bj++]; 4907 ca[k++] = *ba++; 4908 } 4909 } 4910 /* put together the new matrix */ 4911 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4912 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4913 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4914 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4915 mat->free_a = PETSC_TRUE; 4916 mat->free_ij = PETSC_TRUE; 4917 mat->nonew = 0; 4918 } else if (scall == MAT_REUSE_MATRIX){ 4919 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4920 ci = mat->i; cj = mat->j; cam = mat->a; 4921 for (i=0; i<am; i++) { 4922 /* off-diagonal portion of A */ 4923 ncols_o = bi[i+1] - bi[i]; 4924 for (jo=0; jo<ncols_o; jo++) { 4925 col = cmap[*bj]; 4926 if (col >= cstart) break; 4927 *cam++ = *ba++; bj++; 4928 } 4929 /* diagonal portion of A */ 4930 ncols_d = ai[i+1] - ai[i]; 4931 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4932 /* off-diagonal portion of A */ 4933 for (j=jo; j<ncols_o; j++) { 4934 *cam++ = *ba++; bj++; 4935 } 4936 } 4937 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4938 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4939 PetscFunctionReturn(0); 4940 } 4941 4942 #undef __FUNCT__ 4943 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4944 /*@C 4945 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4946 4947 Not Collective 4948 4949 Input Parameters: 4950 + A - the matrix 4951 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4952 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4953 4954 Output Parameter: 4955 . A_loc - the local sequential matrix generated 4956 4957 Level: developer 4958 4959 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4960 4961 @*/ 4962 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4963 { 4964 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4965 PetscErrorCode ierr; 4966 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4967 IS isrowa,iscola; 4968 Mat *aloc; 4969 PetscBool match; 4970 4971 PetscFunctionBegin; 4972 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4973 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4974 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4975 if (!row){ 4976 start = A->rmap->rstart; end = A->rmap->rend; 4977 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4978 } else { 4979 isrowa = *row; 4980 } 4981 if (!col){ 4982 start = A->cmap->rstart; 4983 cmap = a->garray; 4984 nzA = a->A->cmap->n; 4985 nzB = a->B->cmap->n; 4986 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4987 ncols = 0; 4988 for (i=0; i<nzB; i++) { 4989 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4990 else break; 4991 } 4992 imark = i; 4993 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4994 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4995 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4996 } else { 4997 iscola = *col; 4998 } 4999 if (scall != MAT_INITIAL_MATRIX){ 5000 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5001 aloc[0] = *A_loc; 5002 } 5003 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5004 *A_loc = aloc[0]; 5005 ierr = PetscFree(aloc);CHKERRQ(ierr); 5006 if (!row){ 5007 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5008 } 5009 if (!col){ 5010 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5011 } 5012 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5013 PetscFunctionReturn(0); 5014 } 5015 5016 #undef __FUNCT__ 5017 #define __FUNCT__ "MatGetBrowsOfAcols" 5018 /*@C 5019 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5020 5021 Collective on Mat 5022 5023 Input Parameters: 5024 + A,B - the matrices in mpiaij format 5025 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5026 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5027 5028 Output Parameter: 5029 + rowb, colb - index sets of rows and columns of B to extract 5030 - B_seq - the sequential matrix generated 5031 5032 Level: developer 5033 5034 @*/ 5035 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5036 { 5037 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5038 PetscErrorCode ierr; 5039 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5040 IS isrowb,iscolb; 5041 Mat *bseq=PETSC_NULL; 5042 5043 PetscFunctionBegin; 5044 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5045 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5046 } 5047 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5048 5049 if (scall == MAT_INITIAL_MATRIX){ 5050 start = A->cmap->rstart; 5051 cmap = a->garray; 5052 nzA = a->A->cmap->n; 5053 nzB = a->B->cmap->n; 5054 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5055 ncols = 0; 5056 for (i=0; i<nzB; i++) { /* row < local row index */ 5057 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5058 else break; 5059 } 5060 imark = i; 5061 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5062 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5063 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5064 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5065 } else { 5066 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5067 isrowb = *rowb; iscolb = *colb; 5068 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5069 bseq[0] = *B_seq; 5070 } 5071 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5072 *B_seq = bseq[0]; 5073 ierr = PetscFree(bseq);CHKERRQ(ierr); 5074 if (!rowb){ 5075 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5076 } else { 5077 *rowb = isrowb; 5078 } 5079 if (!colb){ 5080 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5081 } else { 5082 *colb = iscolb; 5083 } 5084 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5085 PetscFunctionReturn(0); 5086 } 5087 5088 #undef __FUNCT__ 5089 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5090 /* 5091 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5092 of the OFF-DIAGONAL portion of local A 5093 5094 Collective on Mat 5095 5096 Input Parameters: 5097 + A,B - the matrices in mpiaij format 5098 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5099 5100 Output Parameter: 5101 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5102 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5103 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5104 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5105 5106 Level: developer 5107 5108 */ 5109 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5110 { 5111 VecScatter_MPI_General *gen_to,*gen_from; 5112 PetscErrorCode ierr; 5113 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5114 Mat_SeqAIJ *b_oth; 5115 VecScatter ctx=a->Mvctx; 5116 MPI_Comm comm=((PetscObject)ctx)->comm; 5117 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5118 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5119 PetscScalar *rvalues,*svalues; 5120 MatScalar *b_otha,*bufa,*bufA; 5121 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5122 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5123 MPI_Status *sstatus,rstatus; 5124 PetscMPIInt jj; 5125 PetscInt *cols,sbs,rbs; 5126 PetscScalar *vals; 5127 5128 PetscFunctionBegin; 5129 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5130 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5131 } 5132 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5133 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5134 5135 gen_to = (VecScatter_MPI_General*)ctx->todata; 5136 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5137 rvalues = gen_from->values; /* holds the length of receiving row */ 5138 svalues = gen_to->values; /* holds the length of sending row */ 5139 nrecvs = gen_from->n; 5140 nsends = gen_to->n; 5141 5142 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5143 srow = gen_to->indices; /* local row index to be sent */ 5144 sstarts = gen_to->starts; 5145 sprocs = gen_to->procs; 5146 sstatus = gen_to->sstatus; 5147 sbs = gen_to->bs; 5148 rstarts = gen_from->starts; 5149 rprocs = gen_from->procs; 5150 rbs = gen_from->bs; 5151 5152 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5153 if (scall == MAT_INITIAL_MATRIX){ 5154 /* i-array */ 5155 /*---------*/ 5156 /* post receives */ 5157 for (i=0; i<nrecvs; i++){ 5158 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5159 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5160 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5161 } 5162 5163 /* pack the outgoing message */ 5164 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5165 sstartsj[0] = 0; rstartsj[0] = 0; 5166 len = 0; /* total length of j or a array to be sent */ 5167 k = 0; 5168 for (i=0; i<nsends; i++){ 5169 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5170 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5171 for (j=0; j<nrows; j++) { 5172 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5173 for (l=0; l<sbs; l++){ 5174 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5175 rowlen[j*sbs+l] = ncols; 5176 len += ncols; 5177 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5178 } 5179 k++; 5180 } 5181 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5182 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5183 } 5184 /* recvs and sends of i-array are completed */ 5185 i = nrecvs; 5186 while (i--) { 5187 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5188 } 5189 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5190 5191 /* allocate buffers for sending j and a arrays */ 5192 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5193 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5194 5195 /* create i-array of B_oth */ 5196 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5197 b_othi[0] = 0; 5198 len = 0; /* total length of j or a array to be received */ 5199 k = 0; 5200 for (i=0; i<nrecvs; i++){ 5201 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5202 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5203 for (j=0; j<nrows; j++) { 5204 b_othi[k+1] = b_othi[k] + rowlen[j]; 5205 len += rowlen[j]; k++; 5206 } 5207 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5208 } 5209 5210 /* allocate space for j and a arrrays of B_oth */ 5211 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5212 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5213 5214 /* j-array */ 5215 /*---------*/ 5216 /* post receives of j-array */ 5217 for (i=0; i<nrecvs; i++){ 5218 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5219 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5220 } 5221 5222 /* pack the outgoing message j-array */ 5223 k = 0; 5224 for (i=0; i<nsends; i++){ 5225 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5226 bufJ = bufj+sstartsj[i]; 5227 for (j=0; j<nrows; j++) { 5228 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5229 for (ll=0; ll<sbs; ll++){ 5230 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5231 for (l=0; l<ncols; l++){ 5232 *bufJ++ = cols[l]; 5233 } 5234 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5235 } 5236 } 5237 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5238 } 5239 5240 /* recvs and sends of j-array are completed */ 5241 i = nrecvs; 5242 while (i--) { 5243 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5244 } 5245 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5246 } else if (scall == MAT_REUSE_MATRIX){ 5247 sstartsj = *startsj_s; 5248 rstartsj = *startsj_r; 5249 bufa = *bufa_ptr; 5250 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5251 b_otha = b_oth->a; 5252 } else { 5253 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5254 } 5255 5256 /* a-array */ 5257 /*---------*/ 5258 /* post receives of a-array */ 5259 for (i=0; i<nrecvs; i++){ 5260 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5261 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5262 } 5263 5264 /* pack the outgoing message a-array */ 5265 k = 0; 5266 for (i=0; i<nsends; i++){ 5267 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5268 bufA = bufa+sstartsj[i]; 5269 for (j=0; j<nrows; j++) { 5270 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5271 for (ll=0; ll<sbs; ll++){ 5272 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5273 for (l=0; l<ncols; l++){ 5274 *bufA++ = vals[l]; 5275 } 5276 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5277 } 5278 } 5279 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5280 } 5281 /* recvs and sends of a-array are completed */ 5282 i = nrecvs; 5283 while (i--) { 5284 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5285 } 5286 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5287 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5288 5289 if (scall == MAT_INITIAL_MATRIX){ 5290 /* put together the new matrix */ 5291 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5292 5293 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5294 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5295 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5296 b_oth->free_a = PETSC_TRUE; 5297 b_oth->free_ij = PETSC_TRUE; 5298 b_oth->nonew = 0; 5299 5300 ierr = PetscFree(bufj);CHKERRQ(ierr); 5301 if (!startsj_s || !bufa_ptr){ 5302 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5303 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5304 } else { 5305 *startsj_s = sstartsj; 5306 *startsj_r = rstartsj; 5307 *bufa_ptr = bufa; 5308 } 5309 } 5310 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5311 PetscFunctionReturn(0); 5312 } 5313 5314 #undef __FUNCT__ 5315 #define __FUNCT__ "MatGetCommunicationStructs" 5316 /*@C 5317 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5318 5319 Not Collective 5320 5321 Input Parameters: 5322 . A - The matrix in mpiaij format 5323 5324 Output Parameter: 5325 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5326 . colmap - A map from global column index to local index into lvec 5327 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5328 5329 Level: developer 5330 5331 @*/ 5332 #if defined (PETSC_USE_CTABLE) 5333 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5334 #else 5335 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5336 #endif 5337 { 5338 Mat_MPIAIJ *a; 5339 5340 PetscFunctionBegin; 5341 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5342 PetscValidPointer(lvec, 2); 5343 PetscValidPointer(colmap, 3); 5344 PetscValidPointer(multScatter, 4); 5345 a = (Mat_MPIAIJ *) A->data; 5346 if (lvec) *lvec = a->lvec; 5347 if (colmap) *colmap = a->colmap; 5348 if (multScatter) *multScatter = a->Mvctx; 5349 PetscFunctionReturn(0); 5350 } 5351 5352 EXTERN_C_BEGIN 5353 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5354 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5355 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5356 EXTERN_C_END 5357 5358 #undef __FUNCT__ 5359 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5360 /* 5361 Computes (B'*A')' since computing B*A directly is untenable 5362 5363 n p p 5364 ( ) ( ) ( ) 5365 m ( A ) * n ( B ) = m ( C ) 5366 ( ) ( ) ( ) 5367 5368 */ 5369 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5370 { 5371 PetscErrorCode ierr; 5372 Mat At,Bt,Ct; 5373 5374 PetscFunctionBegin; 5375 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5376 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5377 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5378 ierr = MatDestroy(&At);CHKERRQ(ierr); 5379 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5380 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5381 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5382 PetscFunctionReturn(0); 5383 } 5384 5385 #undef __FUNCT__ 5386 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5387 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5388 { 5389 PetscErrorCode ierr; 5390 PetscInt m=A->rmap->n,n=B->cmap->n; 5391 Mat Cmat; 5392 5393 PetscFunctionBegin; 5394 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5395 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5396 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5397 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5398 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5399 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5400 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5401 *C = Cmat; 5402 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5403 PetscFunctionReturn(0); 5404 } 5405 5406 /* ----------------------------------------------------------------*/ 5407 #undef __FUNCT__ 5408 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5409 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5410 { 5411 PetscErrorCode ierr; 5412 5413 PetscFunctionBegin; 5414 if (scall == MAT_INITIAL_MATRIX){ 5415 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5416 } 5417 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5418 PetscFunctionReturn(0); 5419 } 5420 5421 EXTERN_C_BEGIN 5422 #if defined(PETSC_HAVE_MUMPS) 5423 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5424 #endif 5425 #if defined(PETSC_HAVE_PASTIX) 5426 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5427 #endif 5428 #if defined(PETSC_HAVE_SUPERLU_DIST) 5429 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5430 #endif 5431 #if defined(PETSC_HAVE_SPOOLES) 5432 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5433 #endif 5434 EXTERN_C_END 5435 5436 /*MC 5437 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5438 5439 Options Database Keys: 5440 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5441 5442 Level: beginner 5443 5444 .seealso: MatCreateMPIAIJ() 5445 M*/ 5446 5447 EXTERN_C_BEGIN 5448 #undef __FUNCT__ 5449 #define __FUNCT__ "MatCreate_MPIAIJ" 5450 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5451 { 5452 Mat_MPIAIJ *b; 5453 PetscErrorCode ierr; 5454 PetscMPIInt size; 5455 5456 PetscFunctionBegin; 5457 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5458 5459 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5460 B->data = (void*)b; 5461 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5462 B->rmap->bs = 1; 5463 B->assembled = PETSC_FALSE; 5464 5465 B->insertmode = NOT_SET_VALUES; 5466 b->size = size; 5467 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5468 5469 /* build cache for off array entries formed */ 5470 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5471 b->donotstash = PETSC_FALSE; 5472 b->colmap = 0; 5473 b->garray = 0; 5474 b->roworiented = PETSC_TRUE; 5475 5476 /* stuff used for matrix vector multiply */ 5477 b->lvec = PETSC_NULL; 5478 b->Mvctx = PETSC_NULL; 5479 5480 /* stuff for MatGetRow() */ 5481 b->rowindices = 0; 5482 b->rowvalues = 0; 5483 b->getrowactive = PETSC_FALSE; 5484 5485 #if defined(PETSC_HAVE_SPOOLES) 5486 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5487 "MatGetFactor_mpiaij_spooles", 5488 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5489 #endif 5490 #if defined(PETSC_HAVE_MUMPS) 5491 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5492 "MatGetFactor_aij_mumps", 5493 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5494 #endif 5495 #if defined(PETSC_HAVE_PASTIX) 5496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5497 "MatGetFactor_mpiaij_pastix", 5498 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5499 #endif 5500 #if defined(PETSC_HAVE_SUPERLU_DIST) 5501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5502 "MatGetFactor_mpiaij_superlu_dist", 5503 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5504 #endif 5505 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5506 "MatStoreValues_MPIAIJ", 5507 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5508 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5509 "MatRetrieveValues_MPIAIJ", 5510 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5511 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5512 "MatGetDiagonalBlock_MPIAIJ", 5513 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5514 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5515 "MatIsTranspose_MPIAIJ", 5516 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5517 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5518 "MatMPIAIJSetPreallocation_MPIAIJ", 5519 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5520 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5521 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5522 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5523 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5524 "MatDiagonalScaleLocal_MPIAIJ", 5525 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5526 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5527 "MatConvert_MPIAIJ_MPIAIJPERM", 5528 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5529 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5530 "MatConvert_MPIAIJ_MPIAIJCRL", 5531 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5532 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5533 "MatConvert_MPIAIJ_MPISBAIJ", 5534 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5535 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5536 "MatMatMult_MPIDense_MPIAIJ", 5537 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5538 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5539 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5540 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5541 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5542 "MatMatMultNumeric_MPIDense_MPIAIJ", 5543 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5544 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5545 PetscFunctionReturn(0); 5546 } 5547 EXTERN_C_END 5548 5549 #undef __FUNCT__ 5550 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5551 /*@ 5552 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5553 and "off-diagonal" part of the matrix in CSR format. 5554 5555 Collective on MPI_Comm 5556 5557 Input Parameters: 5558 + comm - MPI communicator 5559 . m - number of local rows (Cannot be PETSC_DECIDE) 5560 . n - This value should be the same as the local size used in creating the 5561 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5562 calculated if N is given) For square matrices n is almost always m. 5563 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5564 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5565 . i - row indices for "diagonal" portion of matrix 5566 . j - column indices 5567 . a - matrix values 5568 . oi - row indices for "off-diagonal" portion of matrix 5569 . oj - column indices 5570 - oa - matrix values 5571 5572 Output Parameter: 5573 . mat - the matrix 5574 5575 Level: advanced 5576 5577 Notes: 5578 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5579 must free the arrays once the matrix has been destroyed and not before. 5580 5581 The i and j indices are 0 based 5582 5583 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5584 5585 This sets local rows and cannot be used to set off-processor values. 5586 5587 You cannot later use MatSetValues() to change values in this matrix. 5588 5589 .keywords: matrix, aij, compressed row, sparse, parallel 5590 5591 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5592 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5593 @*/ 5594 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5595 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5596 { 5597 PetscErrorCode ierr; 5598 Mat_MPIAIJ *maij; 5599 5600 PetscFunctionBegin; 5601 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5602 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5603 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5604 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5605 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5606 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5607 maij = (Mat_MPIAIJ*) (*mat)->data; 5608 maij->donotstash = PETSC_TRUE; 5609 (*mat)->preallocated = PETSC_TRUE; 5610 5611 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5612 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5613 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5614 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5615 5616 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5617 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5618 5619 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5620 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5621 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5622 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5623 5624 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5625 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5626 PetscFunctionReturn(0); 5627 } 5628 5629 /* 5630 Special version for direct calls from Fortran 5631 */ 5632 #include <private/fortranimpl.h> 5633 5634 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5635 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5636 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5637 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5638 #endif 5639 5640 /* Change these macros so can be used in void function */ 5641 #undef CHKERRQ 5642 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5643 #undef SETERRQ2 5644 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5645 #undef SETERRQ 5646 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5647 5648 EXTERN_C_BEGIN 5649 #undef __FUNCT__ 5650 #define __FUNCT__ "matsetvaluesmpiaij_" 5651 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5652 { 5653 Mat mat = *mmat; 5654 PetscInt m = *mm, n = *mn; 5655 InsertMode addv = *maddv; 5656 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5657 PetscScalar value; 5658 PetscErrorCode ierr; 5659 5660 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5661 if (mat->insertmode == NOT_SET_VALUES) { 5662 mat->insertmode = addv; 5663 } 5664 #if defined(PETSC_USE_DEBUG) 5665 else if (mat->insertmode != addv) { 5666 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5667 } 5668 #endif 5669 { 5670 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5671 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5672 PetscBool roworiented = aij->roworiented; 5673 5674 /* Some Variables required in the macro */ 5675 Mat A = aij->A; 5676 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5677 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5678 MatScalar *aa = a->a; 5679 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5680 Mat B = aij->B; 5681 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5682 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5683 MatScalar *ba = b->a; 5684 5685 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5686 PetscInt nonew = a->nonew; 5687 MatScalar *ap1,*ap2; 5688 5689 PetscFunctionBegin; 5690 for (i=0; i<m; i++) { 5691 if (im[i] < 0) continue; 5692 #if defined(PETSC_USE_DEBUG) 5693 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5694 #endif 5695 if (im[i] >= rstart && im[i] < rend) { 5696 row = im[i] - rstart; 5697 lastcol1 = -1; 5698 rp1 = aj + ai[row]; 5699 ap1 = aa + ai[row]; 5700 rmax1 = aimax[row]; 5701 nrow1 = ailen[row]; 5702 low1 = 0; 5703 high1 = nrow1; 5704 lastcol2 = -1; 5705 rp2 = bj + bi[row]; 5706 ap2 = ba + bi[row]; 5707 rmax2 = bimax[row]; 5708 nrow2 = bilen[row]; 5709 low2 = 0; 5710 high2 = nrow2; 5711 5712 for (j=0; j<n; j++) { 5713 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5714 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5715 if (in[j] >= cstart && in[j] < cend){ 5716 col = in[j] - cstart; 5717 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5718 } else if (in[j] < 0) continue; 5719 #if defined(PETSC_USE_DEBUG) 5720 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5721 #endif 5722 else { 5723 if (mat->was_assembled) { 5724 if (!aij->colmap) { 5725 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5726 } 5727 #if defined (PETSC_USE_CTABLE) 5728 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5729 col--; 5730 #else 5731 col = aij->colmap[in[j]] - 1; 5732 #endif 5733 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5734 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5735 col = in[j]; 5736 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5737 B = aij->B; 5738 b = (Mat_SeqAIJ*)B->data; 5739 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5740 rp2 = bj + bi[row]; 5741 ap2 = ba + bi[row]; 5742 rmax2 = bimax[row]; 5743 nrow2 = bilen[row]; 5744 low2 = 0; 5745 high2 = nrow2; 5746 bm = aij->B->rmap->n; 5747 ba = b->a; 5748 } 5749 } else col = in[j]; 5750 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5751 } 5752 } 5753 } else { 5754 if (!aij->donotstash) { 5755 if (roworiented) { 5756 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5757 } else { 5758 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5759 } 5760 } 5761 } 5762 }} 5763 PetscFunctionReturnVoid(); 5764 } 5765 EXTERN_C_END 5766 5767