1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 nonew = a->nonew; 506 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 507 } else if (in[j] < 0) continue; 508 #if defined(PETSC_USE_DEBUG) 509 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 510 #endif 511 else { 512 if (mat->was_assembled) { 513 if (!aij->colmap) { 514 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 515 } 516 #if defined (PETSC_USE_CTABLE) 517 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 518 col--; 519 #else 520 col = aij->colmap[in[j]] - 1; 521 #endif 522 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 col = in[j]; 525 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 526 B = aij->B; 527 b = (Mat_SeqAIJ*)B->data; 528 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 529 rp2 = bj + bi[row]; 530 ap2 = ba + bi[row]; 531 rmax2 = bimax[row]; 532 nrow2 = bilen[row]; 533 low2 = 0; 534 high2 = nrow2; 535 bm = aij->B->rmap->n; 536 ba = b->a; 537 } 538 } else col = in[j]; 539 nonew = b->nonew; 540 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 541 } 542 } 543 } else { 544 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 545 if (!aij->donotstash) { 546 mat->assembled = PETSC_FALSE; 547 if (roworiented) { 548 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatGetValues_MPIAIJ" 560 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 561 { 562 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 563 PetscErrorCode ierr; 564 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 565 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 566 567 PetscFunctionBegin; 568 for (i=0; i<m; i++) { 569 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 570 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 571 if (idxm[i] >= rstart && idxm[i] < rend) { 572 row = idxm[i] - rstart; 573 for (j=0; j<n; j++) { 574 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 575 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 576 if (idxn[j] >= cstart && idxn[j] < cend){ 577 col = idxn[j] - cstart; 578 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 579 } else { 580 if (!aij->colmap) { 581 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 582 } 583 #if defined (PETSC_USE_CTABLE) 584 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 585 col --; 586 #else 587 col = aij->colmap[idxn[j]] - 1; 588 #endif 589 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 590 else { 591 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 592 } 593 } 594 } 595 } else { 596 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 597 } 598 } 599 PetscFunctionReturn(0); 600 } 601 602 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 606 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 607 { 608 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 609 PetscErrorCode ierr; 610 PetscInt nstash,reallocs; 611 InsertMode addv; 612 613 PetscFunctionBegin; 614 if (aij->donotstash || mat->nooffprocentries) { 615 PetscFunctionReturn(0); 616 } 617 618 /* make sure all processors are either in INSERTMODE or ADDMODE */ 619 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 620 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 621 mat->insertmode = addv; /* in case this processor had no cache */ 622 623 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 624 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 625 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 631 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 632 { 633 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 634 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 635 PetscErrorCode ierr; 636 PetscMPIInt n; 637 PetscInt i,j,rstart,ncols,flg; 638 PetscInt *row,*col; 639 PetscBool other_disassembled; 640 PetscScalar *val; 641 InsertMode addv = mat->insertmode; 642 643 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 644 PetscFunctionBegin; 645 if (!aij->donotstash && !mat->nooffprocentries) { 646 while (1) { 647 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 648 if (!flg) break; 649 650 for (i=0; i<n;) { 651 /* Now identify the consecutive vals belonging to the same row */ 652 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 653 if (j < n) ncols = j-i; 654 else ncols = n-i; 655 /* Now assemble all these values with a single function call */ 656 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 657 i = j; 658 } 659 } 660 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 661 } 662 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 663 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 664 665 /* determine if any processor has disassembled, if so we must 666 also disassemble ourselfs, in order that we may reassemble. */ 667 /* 668 if nonzero structure of submatrix B cannot change then we know that 669 no processor disassembled thus we can skip this stuff 670 */ 671 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 672 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 673 if (mat->was_assembled && !other_disassembled) { 674 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 675 } 676 } 677 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 678 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 679 } 680 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 681 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 682 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 683 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 684 685 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 686 aij->rowvalues = 0; 687 688 /* used by MatAXPY() */ 689 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 690 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 691 692 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 693 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 699 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 700 { 701 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 702 PetscErrorCode ierr; 703 704 PetscFunctionBegin; 705 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 706 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatZeroRows_MPIAIJ" 712 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 713 { 714 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 715 PetscErrorCode ierr; 716 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 717 PetscInt i,*owners = A->rmap->range; 718 PetscInt *nprocs,j,idx,nsends,row; 719 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 720 PetscInt *rvalues,count,base,slen,*source; 721 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 722 MPI_Comm comm = ((PetscObject)A)->comm; 723 MPI_Request *send_waits,*recv_waits; 724 MPI_Status recv_status,*send_status; 725 const PetscScalar *xx; 726 PetscScalar *bb; 727 #if defined(PETSC_DEBUG) 728 PetscBool found = PETSC_FALSE; 729 #endif 730 731 PetscFunctionBegin; 732 /* first count number of contributors to each processor */ 733 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 734 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 735 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 736 j = 0; 737 for (i=0; i<N; i++) { 738 if (lastidx > (idx = rows[i])) j = 0; 739 lastidx = idx; 740 for (; j<size; j++) { 741 if (idx >= owners[j] && idx < owners[j+1]) { 742 nprocs[2*j]++; 743 nprocs[2*j+1] = 1; 744 owner[i] = j; 745 #if defined(PETSC_DEBUG) 746 found = PETSC_TRUE; 747 #endif 748 break; 749 } 750 } 751 #if defined(PETSC_DEBUG) 752 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 753 found = PETSC_FALSE; 754 #endif 755 } 756 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 757 758 if (A->nooffproczerorows) { 759 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 760 nrecvs = nsends; 761 nmax = N; 762 } else { 763 /* inform other processors of number of messages and max length*/ 764 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 765 } 766 767 /* post receives: */ 768 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 769 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 770 for (i=0; i<nrecvs; i++) { 771 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 772 } 773 774 /* do sends: 775 1) starts[i] gives the starting index in svalues for stuff going to 776 the ith processor 777 */ 778 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 779 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 780 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 781 starts[0] = 0; 782 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 783 for (i=0; i<N; i++) { 784 svalues[starts[owner[i]]++] = rows[i]; 785 } 786 787 starts[0] = 0; 788 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 789 count = 0; 790 for (i=0; i<size; i++) { 791 if (nprocs[2*i+1]) { 792 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 793 } 794 } 795 ierr = PetscFree(starts);CHKERRQ(ierr); 796 797 base = owners[rank]; 798 799 /* wait on receives */ 800 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 801 count = nrecvs; slen = 0; 802 while (count) { 803 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 804 /* unpack receives into our local space */ 805 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 806 source[imdex] = recv_status.MPI_SOURCE; 807 lens[imdex] = n; 808 slen += n; 809 count--; 810 } 811 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 812 813 /* move the data into the send scatter */ 814 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 815 count = 0; 816 for (i=0; i<nrecvs; i++) { 817 values = rvalues + i*nmax; 818 for (j=0; j<lens[i]; j++) { 819 lrows[count++] = values[j] - base; 820 } 821 } 822 ierr = PetscFree(rvalues);CHKERRQ(ierr); 823 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 824 ierr = PetscFree(owner);CHKERRQ(ierr); 825 ierr = PetscFree(nprocs);CHKERRQ(ierr); 826 827 /* fix right hand side if needed */ 828 if (x && b) { 829 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 830 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 831 for (i=0; i<slen; i++) { 832 bb[lrows[i]] = diag*xx[lrows[i]]; 833 } 834 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 835 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 836 } 837 /* 838 Zero the required rows. If the "diagonal block" of the matrix 839 is square and the user wishes to set the diagonal we use separate 840 code so that MatSetValues() is not called for each diagonal allocating 841 new memory, thus calling lots of mallocs and slowing things down. 842 843 */ 844 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 845 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 846 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 847 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 848 } else if (diag != 0.0) { 849 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 850 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 851 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 852 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 853 } 854 for (i = 0; i < slen; i++) { 855 row = lrows[i] + rstart; 856 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 857 } 858 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 859 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 860 } else { 861 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 862 } 863 ierr = PetscFree(lrows);CHKERRQ(ierr); 864 865 /* wait on sends */ 866 if (nsends) { 867 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 868 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 869 ierr = PetscFree(send_status);CHKERRQ(ierr); 870 } 871 ierr = PetscFree(send_waits);CHKERRQ(ierr); 872 ierr = PetscFree(svalues);CHKERRQ(ierr); 873 PetscFunctionReturn(0); 874 } 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 878 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 879 { 880 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 881 PetscErrorCode ierr; 882 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 883 PetscInt i,*owners = A->rmap->range; 884 PetscInt *nprocs,j,idx,nsends; 885 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 886 PetscInt *rvalues,count,base,slen,*source; 887 PetscInt *lens,*lrows,*values,m; 888 MPI_Comm comm = ((PetscObject)A)->comm; 889 MPI_Request *send_waits,*recv_waits; 890 MPI_Status recv_status,*send_status; 891 const PetscScalar *xx; 892 PetscScalar *bb,*mask; 893 Vec xmask,lmask; 894 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 895 const PetscInt *aj, *ii,*ridx; 896 PetscScalar *aa; 897 #if defined(PETSC_DEBUG) 898 PetscBool found = PETSC_FALSE; 899 #endif 900 901 PetscFunctionBegin; 902 /* first count number of contributors to each processor */ 903 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 904 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 905 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 906 j = 0; 907 for (i=0; i<N; i++) { 908 if (lastidx > (idx = rows[i])) j = 0; 909 lastidx = idx; 910 for (; j<size; j++) { 911 if (idx >= owners[j] && idx < owners[j+1]) { 912 nprocs[2*j]++; 913 nprocs[2*j+1] = 1; 914 owner[i] = j; 915 #if defined(PETSC_DEBUG) 916 found = PETSC_TRUE; 917 #endif 918 break; 919 } 920 } 921 #if defined(PETSC_DEBUG) 922 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 923 found = PETSC_FALSE; 924 #endif 925 } 926 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 927 928 /* inform other processors of number of messages and max length*/ 929 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 930 931 /* post receives: */ 932 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 933 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 934 for (i=0; i<nrecvs; i++) { 935 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 936 } 937 938 /* do sends: 939 1) starts[i] gives the starting index in svalues for stuff going to 940 the ith processor 941 */ 942 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 943 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 944 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 945 starts[0] = 0; 946 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 947 for (i=0; i<N; i++) { 948 svalues[starts[owner[i]]++] = rows[i]; 949 } 950 951 starts[0] = 0; 952 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 953 count = 0; 954 for (i=0; i<size; i++) { 955 if (nprocs[2*i+1]) { 956 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 957 } 958 } 959 ierr = PetscFree(starts);CHKERRQ(ierr); 960 961 base = owners[rank]; 962 963 /* wait on receives */ 964 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 965 count = nrecvs; slen = 0; 966 while (count) { 967 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 968 /* unpack receives into our local space */ 969 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 970 source[imdex] = recv_status.MPI_SOURCE; 971 lens[imdex] = n; 972 slen += n; 973 count--; 974 } 975 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 976 977 /* move the data into the send scatter */ 978 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 979 count = 0; 980 for (i=0; i<nrecvs; i++) { 981 values = rvalues + i*nmax; 982 for (j=0; j<lens[i]; j++) { 983 lrows[count++] = values[j] - base; 984 } 985 } 986 ierr = PetscFree(rvalues);CHKERRQ(ierr); 987 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 988 ierr = PetscFree(owner);CHKERRQ(ierr); 989 ierr = PetscFree(nprocs);CHKERRQ(ierr); 990 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 991 992 /* zero diagonal part of matrix */ 993 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 994 995 /* handle off diagonal part of matrix */ 996 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 997 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 998 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 999 for (i=0; i<slen; i++) { 1000 bb[lrows[i]] = 1; 1001 } 1002 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1003 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1006 if (x) { 1007 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1010 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1011 } 1012 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1013 1014 /* remove zeroed rows of off diagonal matrix */ 1015 ii = aij->i; 1016 for (i=0; i<slen; i++) { 1017 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1018 } 1019 1020 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1021 if (aij->compressedrow.use){ 1022 m = aij->compressedrow.nrows; 1023 ii = aij->compressedrow.i; 1024 ridx = aij->compressedrow.rindex; 1025 for (i=0; i<m; i++){ 1026 n = ii[i+1] - ii[i]; 1027 aj = aij->j + ii[i]; 1028 aa = aij->a + ii[i]; 1029 1030 for (j=0; j<n; j++) { 1031 if (PetscAbsScalar(mask[*aj])) { 1032 if (b) bb[*ridx] -= *aa*xx[*aj]; 1033 *aa = 0.0; 1034 } 1035 aa++; 1036 aj++; 1037 } 1038 ridx++; 1039 } 1040 } else { /* do not use compressed row format */ 1041 m = l->B->rmap->n; 1042 for (i=0; i<m; i++) { 1043 n = ii[i+1] - ii[i]; 1044 aj = aij->j + ii[i]; 1045 aa = aij->a + ii[i]; 1046 for (j=0; j<n; j++) { 1047 if (PetscAbsScalar(mask[*aj])) { 1048 if (b) bb[i] -= *aa*xx[*aj]; 1049 *aa = 0.0; 1050 } 1051 aa++; 1052 aj++; 1053 } 1054 } 1055 } 1056 if (x) { 1057 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1058 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1059 } 1060 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1061 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1062 ierr = PetscFree(lrows);CHKERRQ(ierr); 1063 1064 /* wait on sends */ 1065 if (nsends) { 1066 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1067 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1068 ierr = PetscFree(send_status);CHKERRQ(ierr); 1069 } 1070 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1071 ierr = PetscFree(svalues);CHKERRQ(ierr); 1072 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatMult_MPIAIJ" 1078 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1079 { 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1081 PetscErrorCode ierr; 1082 PetscInt nt; 1083 1084 PetscFunctionBegin; 1085 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1086 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1087 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1089 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1096 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1097 { 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1108 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1116 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1123 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1124 { 1125 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1126 PetscErrorCode ierr; 1127 PetscBool merged; 1128 1129 PetscFunctionBegin; 1130 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1131 /* do nondiagonal part */ 1132 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1133 if (!merged) { 1134 /* send it on its way */ 1135 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1136 /* do local part */ 1137 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1138 /* receive remote parts: note this assumes the values are not actually */ 1139 /* added in yy until the next line, */ 1140 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 } else { 1142 /* do local part */ 1143 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1144 /* send it on its way */ 1145 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1146 /* values actually were received in the Begin() but we need to call this nop */ 1147 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 } 1149 PetscFunctionReturn(0); 1150 } 1151 1152 EXTERN_C_BEGIN 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1155 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1156 { 1157 MPI_Comm comm; 1158 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1159 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1160 IS Me,Notme; 1161 PetscErrorCode ierr; 1162 PetscInt M,N,first,last,*notme,i; 1163 PetscMPIInt size; 1164 1165 PetscFunctionBegin; 1166 1167 /* Easy test: symmetric diagonal block */ 1168 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1169 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1170 if (!*f) PetscFunctionReturn(0); 1171 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1172 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1173 if (size == 1) PetscFunctionReturn(0); 1174 1175 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1176 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1177 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1178 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1179 for (i=0; i<first; i++) notme[i] = i; 1180 for (i=last; i<M; i++) notme[i-last+first] = i; 1181 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1182 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1183 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1184 Aoff = Aoffs[0]; 1185 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1186 Boff = Boffs[0]; 1187 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1188 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1189 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1190 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1191 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1192 ierr = PetscFree(notme);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 EXTERN_C_END 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1199 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 /* do nondiagonal part */ 1206 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1207 /* send it on its way */ 1208 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1209 /* do local part */ 1210 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1211 /* receive remote parts */ 1212 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /* 1217 This only works correctly for square matrices where the subblock A->A is the 1218 diagonal block 1219 */ 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1222 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1223 { 1224 PetscErrorCode ierr; 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1226 1227 PetscFunctionBegin; 1228 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1229 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1230 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatScale_MPIAIJ" 1236 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1237 { 1238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1239 PetscErrorCode ierr; 1240 1241 PetscFunctionBegin; 1242 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1243 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatDestroy_MPIAIJ" 1249 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1250 { 1251 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1252 PetscErrorCode ierr; 1253 1254 PetscFunctionBegin; 1255 #if defined(PETSC_USE_LOG) 1256 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1257 #endif 1258 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1259 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1260 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1261 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1262 #if defined (PETSC_USE_CTABLE) 1263 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1264 #else 1265 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1266 #endif 1267 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1268 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1269 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1270 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1271 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1272 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1273 1274 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1288 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1289 { 1290 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1291 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1292 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1293 PetscErrorCode ierr; 1294 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1295 int fd; 1296 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1297 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1298 PetscScalar *column_values; 1299 PetscInt message_count,flowcontrolcount; 1300 1301 PetscFunctionBegin; 1302 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1303 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1304 nz = A->nz + B->nz; 1305 if (!rank) { 1306 header[0] = MAT_FILE_CLASSID; 1307 header[1] = mat->rmap->N; 1308 header[2] = mat->cmap->N; 1309 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1310 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1311 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1312 /* get largest number of rows any processor has */ 1313 rlen = mat->rmap->n; 1314 range = mat->rmap->range; 1315 for (i=1; i<size; i++) { 1316 rlen = PetscMax(rlen,range[i+1] - range[i]); 1317 } 1318 } else { 1319 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1320 rlen = mat->rmap->n; 1321 } 1322 1323 /* load up the local row counts */ 1324 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1325 for (i=0; i<mat->rmap->n; i++) { 1326 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1327 } 1328 1329 /* store the row lengths to the file */ 1330 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1331 if (!rank) { 1332 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1333 for (i=1; i<size; i++) { 1334 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1335 rlen = range[i+1] - range[i]; 1336 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1337 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1338 } 1339 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1340 } else { 1341 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1342 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1343 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1344 } 1345 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1346 1347 /* load up the local column indices */ 1348 nzmax = nz; /* )th processor needs space a largest processor needs */ 1349 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1350 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1351 cnt = 0; 1352 for (i=0; i<mat->rmap->n; i++) { 1353 for (j=B->i[i]; j<B->i[i+1]; j++) { 1354 if ( (col = garray[B->j[j]]) > cstart) break; 1355 column_indices[cnt++] = col; 1356 } 1357 for (k=A->i[i]; k<A->i[i+1]; k++) { 1358 column_indices[cnt++] = A->j[k] + cstart; 1359 } 1360 for (; j<B->i[i+1]; j++) { 1361 column_indices[cnt++] = garray[B->j[j]]; 1362 } 1363 } 1364 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1365 1366 /* store the column indices to the file */ 1367 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1368 if (!rank) { 1369 MPI_Status status; 1370 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1371 for (i=1; i<size; i++) { 1372 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1373 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1374 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1375 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1376 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1377 } 1378 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1379 } else { 1380 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1381 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1382 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1383 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1384 } 1385 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1386 1387 /* load up the local column values */ 1388 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1389 cnt = 0; 1390 for (i=0; i<mat->rmap->n; i++) { 1391 for (j=B->i[i]; j<B->i[i+1]; j++) { 1392 if ( garray[B->j[j]] > cstart) break; 1393 column_values[cnt++] = B->a[j]; 1394 } 1395 for (k=A->i[i]; k<A->i[i+1]; k++) { 1396 column_values[cnt++] = A->a[k]; 1397 } 1398 for (; j<B->i[i+1]; j++) { 1399 column_values[cnt++] = B->a[j]; 1400 } 1401 } 1402 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1403 1404 /* store the column values to the file */ 1405 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1406 if (!rank) { 1407 MPI_Status status; 1408 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1409 for (i=1; i<size; i++) { 1410 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1411 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1412 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1413 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1414 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1415 } 1416 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1417 } else { 1418 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1419 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1420 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1421 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1422 } 1423 ierr = PetscFree(column_values);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1429 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1430 { 1431 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1432 PetscErrorCode ierr; 1433 PetscMPIInt rank = aij->rank,size = aij->size; 1434 PetscBool isdraw,iascii,isbinary; 1435 PetscViewer sviewer; 1436 PetscViewerFormat format; 1437 1438 PetscFunctionBegin; 1439 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1440 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1441 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1442 if (iascii) { 1443 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1444 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1445 MatInfo info; 1446 PetscBool inodes; 1447 1448 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1449 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1450 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1451 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1452 if (!inodes) { 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1454 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1455 } else { 1456 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1457 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1458 } 1459 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1461 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1463 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1465 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1466 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1469 PetscInt inodecount,inodelimit,*inodes; 1470 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1471 if (inodes) { 1472 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1473 } else { 1474 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1475 } 1476 PetscFunctionReturn(0); 1477 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1478 PetscFunctionReturn(0); 1479 } 1480 } else if (isbinary) { 1481 if (size == 1) { 1482 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1483 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1484 } else { 1485 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1486 } 1487 PetscFunctionReturn(0); 1488 } else if (isdraw) { 1489 PetscDraw draw; 1490 PetscBool isnull; 1491 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1492 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1493 } 1494 1495 if (size == 1) { 1496 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1497 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1498 } else { 1499 /* assemble the entire matrix onto first processor. */ 1500 Mat A; 1501 Mat_SeqAIJ *Aloc; 1502 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1503 MatScalar *a; 1504 1505 if (mat->rmap->N > 1024) { 1506 PetscBool flg = PETSC_FALSE; 1507 1508 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1509 if (!flg) { 1510 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1511 } 1512 } 1513 1514 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1515 if (!rank) { 1516 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1517 } else { 1518 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1519 } 1520 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1521 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1522 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1523 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1524 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1525 1526 /* copy over the A part */ 1527 Aloc = (Mat_SeqAIJ*)aij->A->data; 1528 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1529 row = mat->rmap->rstart; 1530 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1531 for (i=0; i<m; i++) { 1532 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1533 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1534 } 1535 aj = Aloc->j; 1536 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1537 1538 /* copy over the B part */ 1539 Aloc = (Mat_SeqAIJ*)aij->B->data; 1540 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1541 row = mat->rmap->rstart; 1542 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1543 ct = cols; 1544 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1545 for (i=0; i<m; i++) { 1546 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1547 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1548 } 1549 ierr = PetscFree(ct);CHKERRQ(ierr); 1550 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1551 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1552 /* 1553 Everyone has to call to draw the matrix since the graphics waits are 1554 synchronized across all processors that share the PetscDraw object 1555 */ 1556 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1557 if (!rank) { 1558 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1559 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1560 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1561 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1562 } 1563 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1564 ierr = MatDestroy(&A);CHKERRQ(ierr); 1565 } 1566 PetscFunctionReturn(0); 1567 } 1568 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatView_MPIAIJ" 1571 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1572 { 1573 PetscErrorCode ierr; 1574 PetscBool iascii,isdraw,issocket,isbinary; 1575 1576 PetscFunctionBegin; 1577 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1578 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1579 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1580 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1581 if (iascii || isdraw || isbinary || issocket) { 1582 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1583 } else { 1584 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1585 } 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatSOR_MPIAIJ" 1591 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1592 { 1593 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1594 PetscErrorCode ierr; 1595 Vec bb1 = 0; 1596 PetscBool hasop; 1597 1598 PetscFunctionBegin; 1599 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1600 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1601 } 1602 1603 if (flag == SOR_APPLY_UPPER) { 1604 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1605 PetscFunctionReturn(0); 1606 } 1607 1608 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1609 if (flag & SOR_ZERO_INITIAL_GUESS) { 1610 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1611 its--; 1612 } 1613 1614 while (its--) { 1615 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1616 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1617 1618 /* update rhs: bb1 = bb - B*x */ 1619 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1620 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1621 1622 /* local sweep */ 1623 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1624 } 1625 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1626 if (flag & SOR_ZERO_INITIAL_GUESS) { 1627 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1628 its--; 1629 } 1630 while (its--) { 1631 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1632 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1633 1634 /* update rhs: bb1 = bb - B*x */ 1635 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1636 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1637 1638 /* local sweep */ 1639 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1640 } 1641 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1642 if (flag & SOR_ZERO_INITIAL_GUESS) { 1643 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1644 its--; 1645 } 1646 while (its--) { 1647 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1649 1650 /* update rhs: bb1 = bb - B*x */ 1651 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1652 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1653 1654 /* local sweep */ 1655 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1656 } 1657 } else if (flag & SOR_EISENSTAT) { 1658 Vec xx1; 1659 1660 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1661 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1662 1663 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1665 if (!mat->diag) { 1666 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1667 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1668 } 1669 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1670 if (hasop) { 1671 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1672 } else { 1673 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1674 } 1675 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1676 1677 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1678 1679 /* local sweep */ 1680 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1681 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1682 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1683 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1684 1685 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatPermute_MPIAIJ" 1691 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1692 { 1693 MPI_Comm comm; 1694 PetscInt first,local_rowsize,local_colsize; 1695 const PetscInt *rows; 1696 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1697 PetscErrorCode ierr; 1698 1699 PetscFunctionBegin; 1700 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1701 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1702 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1703 /* collect the global row permutation and invert it */ 1704 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1705 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1706 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1707 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1708 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1709 /* get the local target indices */ 1710 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1711 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1712 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1713 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1714 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1716 /* the column permutation is so much easier; 1717 make a local version of 'colp' and invert it */ 1718 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1719 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1720 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1721 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1722 /* now we just get the submatrix */ 1723 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1724 /* clean up */ 1725 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1726 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1732 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1733 { 1734 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1735 Mat A = mat->A,B = mat->B; 1736 PetscErrorCode ierr; 1737 PetscReal isend[5],irecv[5]; 1738 1739 PetscFunctionBegin; 1740 info->block_size = 1.0; 1741 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1742 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1743 isend[3] = info->memory; isend[4] = info->mallocs; 1744 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1745 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1746 isend[3] += info->memory; isend[4] += info->mallocs; 1747 if (flag == MAT_LOCAL) { 1748 info->nz_used = isend[0]; 1749 info->nz_allocated = isend[1]; 1750 info->nz_unneeded = isend[2]; 1751 info->memory = isend[3]; 1752 info->mallocs = isend[4]; 1753 } else if (flag == MAT_GLOBAL_MAX) { 1754 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1755 info->nz_used = irecv[0]; 1756 info->nz_allocated = irecv[1]; 1757 info->nz_unneeded = irecv[2]; 1758 info->memory = irecv[3]; 1759 info->mallocs = irecv[4]; 1760 } else if (flag == MAT_GLOBAL_SUM) { 1761 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1762 info->nz_used = irecv[0]; 1763 info->nz_allocated = irecv[1]; 1764 info->nz_unneeded = irecv[2]; 1765 info->memory = irecv[3]; 1766 info->mallocs = irecv[4]; 1767 } 1768 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1769 info->fill_ratio_needed = 0; 1770 info->factor_mallocs = 0; 1771 1772 PetscFunctionReturn(0); 1773 } 1774 1775 #undef __FUNCT__ 1776 #define __FUNCT__ "MatSetOption_MPIAIJ" 1777 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1778 { 1779 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1780 PetscErrorCode ierr; 1781 1782 PetscFunctionBegin; 1783 switch (op) { 1784 case MAT_NEW_NONZERO_LOCATIONS: 1785 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1786 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1787 case MAT_KEEP_NONZERO_PATTERN: 1788 case MAT_NEW_NONZERO_LOCATION_ERR: 1789 case MAT_USE_INODES: 1790 case MAT_IGNORE_ZERO_ENTRIES: 1791 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1792 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1793 break; 1794 case MAT_ROW_ORIENTED: 1795 a->roworiented = flg; 1796 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1797 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1798 break; 1799 case MAT_NEW_DIAGONALS: 1800 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1801 break; 1802 case MAT_IGNORE_OFF_PROC_ENTRIES: 1803 a->donotstash = flg; 1804 break; 1805 case MAT_SPD: 1806 A->spd_set = PETSC_TRUE; 1807 A->spd = flg; 1808 if (flg) { 1809 A->symmetric = PETSC_TRUE; 1810 A->structurally_symmetric = PETSC_TRUE; 1811 A->symmetric_set = PETSC_TRUE; 1812 A->structurally_symmetric_set = PETSC_TRUE; 1813 } 1814 break; 1815 case MAT_SYMMETRIC: 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 break; 1818 case MAT_STRUCTURALLY_SYMMETRIC: 1819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1820 break; 1821 case MAT_HERMITIAN: 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 break; 1824 case MAT_SYMMETRY_ETERNAL: 1825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1826 break; 1827 default: 1828 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1829 } 1830 PetscFunctionReturn(0); 1831 } 1832 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatGetRow_MPIAIJ" 1835 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1836 { 1837 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1838 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1839 PetscErrorCode ierr; 1840 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1841 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1842 PetscInt *cmap,*idx_p; 1843 1844 PetscFunctionBegin; 1845 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1846 mat->getrowactive = PETSC_TRUE; 1847 1848 if (!mat->rowvalues && (idx || v)) { 1849 /* 1850 allocate enough space to hold information from the longest row. 1851 */ 1852 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1853 PetscInt max = 1,tmp; 1854 for (i=0; i<matin->rmap->n; i++) { 1855 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1856 if (max < tmp) { max = tmp; } 1857 } 1858 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1859 } 1860 1861 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1862 lrow = row - rstart; 1863 1864 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1865 if (!v) {pvA = 0; pvB = 0;} 1866 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1867 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1868 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1869 nztot = nzA + nzB; 1870 1871 cmap = mat->garray; 1872 if (v || idx) { 1873 if (nztot) { 1874 /* Sort by increasing column numbers, assuming A and B already sorted */ 1875 PetscInt imark = -1; 1876 if (v) { 1877 *v = v_p = mat->rowvalues; 1878 for (i=0; i<nzB; i++) { 1879 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1880 else break; 1881 } 1882 imark = i; 1883 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1884 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1885 } 1886 if (idx) { 1887 *idx = idx_p = mat->rowindices; 1888 if (imark > -1) { 1889 for (i=0; i<imark; i++) { 1890 idx_p[i] = cmap[cworkB[i]]; 1891 } 1892 } else { 1893 for (i=0; i<nzB; i++) { 1894 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1895 else break; 1896 } 1897 imark = i; 1898 } 1899 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1900 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1901 } 1902 } else { 1903 if (idx) *idx = 0; 1904 if (v) *v = 0; 1905 } 1906 } 1907 *nz = nztot; 1908 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1909 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1915 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1916 { 1917 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1918 1919 PetscFunctionBegin; 1920 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1921 aij->getrowactive = PETSC_FALSE; 1922 PetscFunctionReturn(0); 1923 } 1924 1925 #undef __FUNCT__ 1926 #define __FUNCT__ "MatNorm_MPIAIJ" 1927 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1928 { 1929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1930 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1931 PetscErrorCode ierr; 1932 PetscInt i,j,cstart = mat->cmap->rstart; 1933 PetscReal sum = 0.0; 1934 MatScalar *v; 1935 1936 PetscFunctionBegin; 1937 if (aij->size == 1) { 1938 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1939 } else { 1940 if (type == NORM_FROBENIUS) { 1941 v = amat->a; 1942 for (i=0; i<amat->nz; i++) { 1943 #if defined(PETSC_USE_COMPLEX) 1944 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1945 #else 1946 sum += (*v)*(*v); v++; 1947 #endif 1948 } 1949 v = bmat->a; 1950 for (i=0; i<bmat->nz; i++) { 1951 #if defined(PETSC_USE_COMPLEX) 1952 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1953 #else 1954 sum += (*v)*(*v); v++; 1955 #endif 1956 } 1957 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1958 *norm = PetscSqrtReal(*norm); 1959 } else if (type == NORM_1) { /* max column norm */ 1960 PetscReal *tmp,*tmp2; 1961 PetscInt *jj,*garray = aij->garray; 1962 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1963 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1964 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1965 *norm = 0.0; 1966 v = amat->a; jj = amat->j; 1967 for (j=0; j<amat->nz; j++) { 1968 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1969 } 1970 v = bmat->a; jj = bmat->j; 1971 for (j=0; j<bmat->nz; j++) { 1972 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1973 } 1974 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1975 for (j=0; j<mat->cmap->N; j++) { 1976 if (tmp2[j] > *norm) *norm = tmp2[j]; 1977 } 1978 ierr = PetscFree(tmp);CHKERRQ(ierr); 1979 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1980 } else if (type == NORM_INFINITY) { /* max row norm */ 1981 PetscReal ntemp = 0.0; 1982 for (j=0; j<aij->A->rmap->n; j++) { 1983 v = amat->a + amat->i[j]; 1984 sum = 0.0; 1985 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1986 sum += PetscAbsScalar(*v); v++; 1987 } 1988 v = bmat->a + bmat->i[j]; 1989 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1990 sum += PetscAbsScalar(*v); v++; 1991 } 1992 if (sum > ntemp) ntemp = sum; 1993 } 1994 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1995 } else { 1996 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1997 } 1998 } 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "MatTranspose_MPIAIJ" 2004 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2005 { 2006 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2007 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2008 PetscErrorCode ierr; 2009 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2010 PetscInt cstart=A->cmap->rstart,ncol; 2011 Mat B; 2012 MatScalar *array; 2013 2014 PetscFunctionBegin; 2015 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2016 2017 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2018 ai = Aloc->i; aj = Aloc->j; 2019 bi = Bloc->i; bj = Bloc->j; 2020 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2021 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2022 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2023 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2024 for (i=0; i<ai[ma]; i++){ 2025 d_nnz[aj[i]] ++; 2026 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2027 } 2028 2029 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2030 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2031 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2032 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2033 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2034 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2035 } else { 2036 B = *matout; 2037 } 2038 2039 /* copy over the A part */ 2040 array = Aloc->a; 2041 row = A->rmap->rstart; 2042 for (i=0; i<ma; i++) { 2043 ncol = ai[i+1]-ai[i]; 2044 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2045 row++; array += ncol; aj += ncol; 2046 } 2047 aj = Aloc->j; 2048 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2049 2050 /* copy over the B part */ 2051 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2052 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2053 array = Bloc->a; 2054 row = A->rmap->rstart; 2055 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2056 cols_tmp = cols; 2057 for (i=0; i<mb; i++) { 2058 ncol = bi[i+1]-bi[i]; 2059 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2060 row++; array += ncol; cols_tmp += ncol; 2061 } 2062 ierr = PetscFree(cols);CHKERRQ(ierr); 2063 2064 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2065 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2066 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2067 *matout = B; 2068 } else { 2069 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2070 } 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2076 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2077 { 2078 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2079 Mat a = aij->A,b = aij->B; 2080 PetscErrorCode ierr; 2081 PetscInt s1,s2,s3; 2082 2083 PetscFunctionBegin; 2084 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2085 if (rr) { 2086 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2087 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2088 /* Overlap communication with computation. */ 2089 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2090 } 2091 if (ll) { 2092 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2093 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2094 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2095 } 2096 /* scale the diagonal block */ 2097 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2098 2099 if (rr) { 2100 /* Do a scatter end and then right scale the off-diagonal block */ 2101 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2102 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2103 } 2104 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2110 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2111 { 2112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2113 PetscErrorCode ierr; 2114 2115 PetscFunctionBegin; 2116 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2117 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2118 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2119 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2124 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2125 { 2126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatEqual_MPIAIJ" 2136 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2137 { 2138 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2139 Mat a,b,c,d; 2140 PetscBool flg; 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 a = matA->A; b = matA->B; 2145 c = matB->A; d = matB->B; 2146 2147 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2148 if (flg) { 2149 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2150 } 2151 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatCopy_MPIAIJ" 2157 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2158 { 2159 PetscErrorCode ierr; 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2161 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2162 2163 PetscFunctionBegin; 2164 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2165 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2166 /* because of the column compression in the off-processor part of the matrix a->B, 2167 the number of columns in a->B and b->B may be different, hence we cannot call 2168 the MatCopy() directly on the two parts. If need be, we can provide a more 2169 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2170 then copying the submatrices */ 2171 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2172 } else { 2173 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2174 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2175 } 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatSetUp_MPIAIJ" 2181 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2182 { 2183 PetscErrorCode ierr; 2184 2185 PetscFunctionBegin; 2186 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2187 PetscFunctionReturn(0); 2188 } 2189 2190 #undef __FUNCT__ 2191 #define __FUNCT__ "MatAXPY_MPIAIJ" 2192 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2193 { 2194 PetscErrorCode ierr; 2195 PetscInt i; 2196 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2197 PetscBLASInt bnz,one=1; 2198 Mat_SeqAIJ *x,*y; 2199 2200 PetscFunctionBegin; 2201 if (str == SAME_NONZERO_PATTERN) { 2202 PetscScalar alpha = a; 2203 x = (Mat_SeqAIJ *)xx->A->data; 2204 y = (Mat_SeqAIJ *)yy->A->data; 2205 bnz = PetscBLASIntCast(x->nz); 2206 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2207 x = (Mat_SeqAIJ *)xx->B->data; 2208 y = (Mat_SeqAIJ *)yy->B->data; 2209 bnz = PetscBLASIntCast(x->nz); 2210 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2211 } else if (str == SUBSET_NONZERO_PATTERN) { 2212 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2213 2214 x = (Mat_SeqAIJ *)xx->B->data; 2215 y = (Mat_SeqAIJ *)yy->B->data; 2216 if (y->xtoy && y->XtoY != xx->B) { 2217 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2218 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2219 } 2220 if (!y->xtoy) { /* get xtoy */ 2221 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2222 y->XtoY = xx->B; 2223 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2224 } 2225 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2226 } else { 2227 Mat B; 2228 PetscInt *nnz_d,*nnz_o; 2229 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2230 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2231 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2232 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2233 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2234 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2235 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2236 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2237 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2238 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2239 ierr = MatHeaderReplace(Y,B); 2240 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2241 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2242 } 2243 PetscFunctionReturn(0); 2244 } 2245 2246 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2247 2248 #undef __FUNCT__ 2249 #define __FUNCT__ "MatConjugate_MPIAIJ" 2250 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2251 { 2252 #if defined(PETSC_USE_COMPLEX) 2253 PetscErrorCode ierr; 2254 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2255 2256 PetscFunctionBegin; 2257 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2258 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2259 #else 2260 PetscFunctionBegin; 2261 #endif 2262 PetscFunctionReturn(0); 2263 } 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatRealPart_MPIAIJ" 2267 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2268 { 2269 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2270 PetscErrorCode ierr; 2271 2272 PetscFunctionBegin; 2273 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2274 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2275 PetscFunctionReturn(0); 2276 } 2277 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2280 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2281 { 2282 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2287 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #ifdef PETSC_HAVE_PBGL 2292 2293 #include <boost/parallel/mpi/bsp_process_group.hpp> 2294 #include <boost/graph/distributed/ilu_default_graph.hpp> 2295 #include <boost/graph/distributed/ilu_0_block.hpp> 2296 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2297 #include <boost/graph/distributed/petsc/interface.hpp> 2298 #include <boost/multi_array.hpp> 2299 #include <boost/parallel/distributed_property_map->hpp> 2300 2301 #undef __FUNCT__ 2302 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2303 /* 2304 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2305 */ 2306 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2307 { 2308 namespace petsc = boost::distributed::petsc; 2309 2310 namespace graph_dist = boost::graph::distributed; 2311 using boost::graph::distributed::ilu_default::process_group_type; 2312 using boost::graph::ilu_permuted; 2313 2314 PetscBool row_identity, col_identity; 2315 PetscContainer c; 2316 PetscInt m, n, M, N; 2317 PetscErrorCode ierr; 2318 2319 PetscFunctionBegin; 2320 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2321 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2322 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2323 if (!row_identity || !col_identity) { 2324 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2325 } 2326 2327 process_group_type pg; 2328 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2329 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2330 lgraph_type& level_graph = *lgraph_p; 2331 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2332 2333 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2334 ilu_permuted(level_graph); 2335 2336 /* put together the new matrix */ 2337 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2338 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2339 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2340 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2341 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2342 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2343 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2344 2345 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2346 ierr = PetscContainerSetPointer(c, lgraph_p); 2347 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2348 ierr = PetscContainerDestroy(&c); 2349 PetscFunctionReturn(0); 2350 } 2351 2352 #undef __FUNCT__ 2353 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2354 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2355 { 2356 PetscFunctionBegin; 2357 PetscFunctionReturn(0); 2358 } 2359 2360 #undef __FUNCT__ 2361 #define __FUNCT__ "MatSolve_MPIAIJ" 2362 /* 2363 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2364 */ 2365 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2366 { 2367 namespace graph_dist = boost::graph::distributed; 2368 2369 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2370 lgraph_type* lgraph_p; 2371 PetscContainer c; 2372 PetscErrorCode ierr; 2373 2374 PetscFunctionBegin; 2375 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2376 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2377 ierr = VecCopy(b, x);CHKERRQ(ierr); 2378 2379 PetscScalar* array_x; 2380 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2381 PetscInt sx; 2382 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2383 2384 PetscScalar* array_b; 2385 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2386 PetscInt sb; 2387 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2388 2389 lgraph_type& level_graph = *lgraph_p; 2390 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2391 2392 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2393 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2394 ref_x(array_x, boost::extents[num_vertices(graph)]); 2395 2396 typedef boost::iterator_property_map<array_ref_type::iterator, 2397 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2398 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2399 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2400 2401 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2402 2403 PetscFunctionReturn(0); 2404 } 2405 #endif 2406 2407 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2408 PetscInt nzlocal,nsends,nrecvs; 2409 PetscMPIInt *send_rank,*recv_rank; 2410 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2411 PetscScalar *sbuf_a,**rbuf_a; 2412 PetscErrorCode (*Destroy)(Mat); 2413 } Mat_Redundant; 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2417 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2418 { 2419 PetscErrorCode ierr; 2420 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2421 PetscInt i; 2422 2423 PetscFunctionBegin; 2424 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2425 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2426 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2427 for (i=0; i<redund->nrecvs; i++){ 2428 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2429 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2430 } 2431 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2432 ierr = PetscFree(redund);CHKERRQ(ierr); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "MatDestroy_MatRedundant" 2438 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2439 { 2440 PetscErrorCode ierr; 2441 PetscContainer container; 2442 Mat_Redundant *redund=PETSC_NULL; 2443 2444 PetscFunctionBegin; 2445 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2446 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2447 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2448 A->ops->destroy = redund->Destroy; 2449 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2450 if (A->ops->destroy) { 2451 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2452 } 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2458 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2459 { 2460 PetscMPIInt rank,size; 2461 MPI_Comm comm=((PetscObject)mat)->comm; 2462 PetscErrorCode ierr; 2463 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2464 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2465 PetscInt *rowrange=mat->rmap->range; 2466 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2467 Mat A=aij->A,B=aij->B,C=*matredundant; 2468 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2469 PetscScalar *sbuf_a; 2470 PetscInt nzlocal=a->nz+b->nz; 2471 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2472 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2473 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2474 MatScalar *aworkA,*aworkB; 2475 PetscScalar *vals; 2476 PetscMPIInt tag1,tag2,tag3,imdex; 2477 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2478 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2479 MPI_Status recv_status,*send_status; 2480 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2481 PetscInt **rbuf_j=PETSC_NULL; 2482 PetscScalar **rbuf_a=PETSC_NULL; 2483 Mat_Redundant *redund=PETSC_NULL; 2484 PetscContainer container; 2485 2486 PetscFunctionBegin; 2487 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2488 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2489 2490 if (reuse == MAT_REUSE_MATRIX) { 2491 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2492 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2493 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2494 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2495 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2496 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2497 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2498 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2499 2500 nsends = redund->nsends; 2501 nrecvs = redund->nrecvs; 2502 send_rank = redund->send_rank; 2503 recv_rank = redund->recv_rank; 2504 sbuf_nz = redund->sbuf_nz; 2505 rbuf_nz = redund->rbuf_nz; 2506 sbuf_j = redund->sbuf_j; 2507 sbuf_a = redund->sbuf_a; 2508 rbuf_j = redund->rbuf_j; 2509 rbuf_a = redund->rbuf_a; 2510 } 2511 2512 if (reuse == MAT_INITIAL_MATRIX){ 2513 PetscMPIInt subrank,subsize; 2514 PetscInt nleftover,np_subcomm; 2515 /* get the destination processors' id send_rank, nsends and nrecvs */ 2516 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2517 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2518 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2519 np_subcomm = size/nsubcomm; 2520 nleftover = size - nsubcomm*np_subcomm; 2521 nsends = 0; nrecvs = 0; 2522 for (i=0; i<size; i++){ /* i=rank*/ 2523 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2524 send_rank[nsends] = i; nsends++; 2525 recv_rank[nrecvs++] = i; 2526 } 2527 } 2528 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2529 i = size-nleftover-1; 2530 j = 0; 2531 while (j < nsubcomm - nleftover){ 2532 send_rank[nsends++] = i; 2533 i--; j++; 2534 } 2535 } 2536 2537 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2538 for (i=0; i<nleftover; i++){ 2539 recv_rank[nrecvs++] = size-nleftover+i; 2540 } 2541 } 2542 2543 /* allocate sbuf_j, sbuf_a */ 2544 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2545 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2546 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2547 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2548 2549 /* copy mat's local entries into the buffers */ 2550 if (reuse == MAT_INITIAL_MATRIX){ 2551 rownz_max = 0; 2552 rptr = sbuf_j; 2553 cols = sbuf_j + rend-rstart + 1; 2554 vals = sbuf_a; 2555 rptr[0] = 0; 2556 for (i=0; i<rend-rstart; i++){ 2557 row = i + rstart; 2558 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2559 ncols = nzA + nzB; 2560 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2561 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2562 /* load the column indices for this row into cols */ 2563 lwrite = 0; 2564 for (l=0; l<nzB; l++) { 2565 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2566 vals[lwrite] = aworkB[l]; 2567 cols[lwrite++] = ctmp; 2568 } 2569 } 2570 for (l=0; l<nzA; l++){ 2571 vals[lwrite] = aworkA[l]; 2572 cols[lwrite++] = cstart + cworkA[l]; 2573 } 2574 for (l=0; l<nzB; l++) { 2575 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2576 vals[lwrite] = aworkB[l]; 2577 cols[lwrite++] = ctmp; 2578 } 2579 } 2580 vals += ncols; 2581 cols += ncols; 2582 rptr[i+1] = rptr[i] + ncols; 2583 if (rownz_max < ncols) rownz_max = ncols; 2584 } 2585 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2586 } else { /* only copy matrix values into sbuf_a */ 2587 rptr = sbuf_j; 2588 vals = sbuf_a; 2589 rptr[0] = 0; 2590 for (i=0; i<rend-rstart; i++){ 2591 row = i + rstart; 2592 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2593 ncols = nzA + nzB; 2594 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2595 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2596 lwrite = 0; 2597 for (l=0; l<nzB; l++) { 2598 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2599 } 2600 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2601 for (l=0; l<nzB; l++) { 2602 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2603 } 2604 vals += ncols; 2605 rptr[i+1] = rptr[i] + ncols; 2606 } 2607 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2608 2609 /* send nzlocal to others, and recv other's nzlocal */ 2610 /*--------------------------------------------------*/ 2611 if (reuse == MAT_INITIAL_MATRIX){ 2612 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2613 s_waits2 = s_waits3 + nsends; 2614 s_waits1 = s_waits2 + nsends; 2615 r_waits1 = s_waits1 + nsends; 2616 r_waits2 = r_waits1 + nrecvs; 2617 r_waits3 = r_waits2 + nrecvs; 2618 } else { 2619 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2620 r_waits3 = s_waits3 + nsends; 2621 } 2622 2623 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2624 if (reuse == MAT_INITIAL_MATRIX){ 2625 /* get new tags to keep the communication clean */ 2626 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2627 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2628 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2629 2630 /* post receives of other's nzlocal */ 2631 for (i=0; i<nrecvs; i++){ 2632 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2633 } 2634 /* send nzlocal to others */ 2635 for (i=0; i<nsends; i++){ 2636 sbuf_nz[i] = nzlocal; 2637 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2638 } 2639 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2640 count = nrecvs; 2641 while (count) { 2642 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2643 recv_rank[imdex] = recv_status.MPI_SOURCE; 2644 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2645 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2646 2647 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2648 rbuf_nz[imdex] += i + 2; 2649 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2650 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2651 count--; 2652 } 2653 /* wait on sends of nzlocal */ 2654 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2655 /* send mat->i,j to others, and recv from other's */ 2656 /*------------------------------------------------*/ 2657 for (i=0; i<nsends; i++){ 2658 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2659 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2660 } 2661 /* wait on receives of mat->i,j */ 2662 /*------------------------------*/ 2663 count = nrecvs; 2664 while (count) { 2665 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2666 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2667 count--; 2668 } 2669 /* wait on sends of mat->i,j */ 2670 /*---------------------------*/ 2671 if (nsends) { 2672 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2673 } 2674 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2675 2676 /* post receives, send and receive mat->a */ 2677 /*----------------------------------------*/ 2678 for (imdex=0; imdex<nrecvs; imdex++) { 2679 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2680 } 2681 for (i=0; i<nsends; i++){ 2682 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2683 } 2684 count = nrecvs; 2685 while (count) { 2686 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2687 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2688 count--; 2689 } 2690 if (nsends) { 2691 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2692 } 2693 2694 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2695 2696 /* create redundant matrix */ 2697 /*-------------------------*/ 2698 if (reuse == MAT_INITIAL_MATRIX){ 2699 /* compute rownz_max for preallocation */ 2700 for (imdex=0; imdex<nrecvs; imdex++){ 2701 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2702 rptr = rbuf_j[imdex]; 2703 for (i=0; i<j; i++){ 2704 ncols = rptr[i+1] - rptr[i]; 2705 if (rownz_max < ncols) rownz_max = ncols; 2706 } 2707 } 2708 2709 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2710 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2711 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2712 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2713 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2714 } else { 2715 C = *matredundant; 2716 } 2717 2718 /* insert local matrix entries */ 2719 rptr = sbuf_j; 2720 cols = sbuf_j + rend-rstart + 1; 2721 vals = sbuf_a; 2722 for (i=0; i<rend-rstart; i++){ 2723 row = i + rstart; 2724 ncols = rptr[i+1] - rptr[i]; 2725 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2726 vals += ncols; 2727 cols += ncols; 2728 } 2729 /* insert received matrix entries */ 2730 for (imdex=0; imdex<nrecvs; imdex++){ 2731 rstart = rowrange[recv_rank[imdex]]; 2732 rend = rowrange[recv_rank[imdex]+1]; 2733 rptr = rbuf_j[imdex]; 2734 cols = rbuf_j[imdex] + rend-rstart + 1; 2735 vals = rbuf_a[imdex]; 2736 for (i=0; i<rend-rstart; i++){ 2737 row = i + rstart; 2738 ncols = rptr[i+1] - rptr[i]; 2739 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2740 vals += ncols; 2741 cols += ncols; 2742 } 2743 } 2744 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2745 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2746 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2747 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2748 if (reuse == MAT_INITIAL_MATRIX) { 2749 PetscContainer container; 2750 *matredundant = C; 2751 /* create a supporting struct and attach it to C for reuse */ 2752 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2753 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2754 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2755 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2756 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2757 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2758 2759 redund->nzlocal = nzlocal; 2760 redund->nsends = nsends; 2761 redund->nrecvs = nrecvs; 2762 redund->send_rank = send_rank; 2763 redund->recv_rank = recv_rank; 2764 redund->sbuf_nz = sbuf_nz; 2765 redund->rbuf_nz = rbuf_nz; 2766 redund->sbuf_j = sbuf_j; 2767 redund->sbuf_a = sbuf_a; 2768 redund->rbuf_j = rbuf_j; 2769 redund->rbuf_a = rbuf_a; 2770 2771 redund->Destroy = C->ops->destroy; 2772 C->ops->destroy = MatDestroy_MatRedundant; 2773 } 2774 PetscFunctionReturn(0); 2775 } 2776 2777 #undef __FUNCT__ 2778 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2779 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2780 { 2781 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2782 PetscErrorCode ierr; 2783 PetscInt i,*idxb = 0; 2784 PetscScalar *va,*vb; 2785 Vec vtmp; 2786 2787 PetscFunctionBegin; 2788 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2789 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2790 if (idx) { 2791 for (i=0; i<A->rmap->n; i++) { 2792 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2793 } 2794 } 2795 2796 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2797 if (idx) { 2798 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2799 } 2800 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2801 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2802 2803 for (i=0; i<A->rmap->n; i++){ 2804 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2805 va[i] = vb[i]; 2806 if (idx) idx[i] = a->garray[idxb[i]]; 2807 } 2808 } 2809 2810 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2811 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2812 ierr = PetscFree(idxb);CHKERRQ(ierr); 2813 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2814 PetscFunctionReturn(0); 2815 } 2816 2817 #undef __FUNCT__ 2818 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2819 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2820 { 2821 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2822 PetscErrorCode ierr; 2823 PetscInt i,*idxb = 0; 2824 PetscScalar *va,*vb; 2825 Vec vtmp; 2826 2827 PetscFunctionBegin; 2828 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2829 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2830 if (idx) { 2831 for (i=0; i<A->cmap->n; i++) { 2832 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2833 } 2834 } 2835 2836 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2837 if (idx) { 2838 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2839 } 2840 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2841 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2842 2843 for (i=0; i<A->rmap->n; i++){ 2844 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2845 va[i] = vb[i]; 2846 if (idx) idx[i] = a->garray[idxb[i]]; 2847 } 2848 } 2849 2850 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2851 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2852 ierr = PetscFree(idxb);CHKERRQ(ierr); 2853 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2854 PetscFunctionReturn(0); 2855 } 2856 2857 #undef __FUNCT__ 2858 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2859 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2860 { 2861 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2862 PetscInt n = A->rmap->n; 2863 PetscInt cstart = A->cmap->rstart; 2864 PetscInt *cmap = mat->garray; 2865 PetscInt *diagIdx, *offdiagIdx; 2866 Vec diagV, offdiagV; 2867 PetscScalar *a, *diagA, *offdiagA; 2868 PetscInt r; 2869 PetscErrorCode ierr; 2870 2871 PetscFunctionBegin; 2872 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2873 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2874 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2875 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2876 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2877 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2878 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2879 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2880 for(r = 0; r < n; ++r) { 2881 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2882 a[r] = diagA[r]; 2883 idx[r] = cstart + diagIdx[r]; 2884 } else { 2885 a[r] = offdiagA[r]; 2886 idx[r] = cmap[offdiagIdx[r]]; 2887 } 2888 } 2889 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2890 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2891 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2892 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2893 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2894 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2895 PetscFunctionReturn(0); 2896 } 2897 2898 #undef __FUNCT__ 2899 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2900 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2901 { 2902 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2903 PetscInt n = A->rmap->n; 2904 PetscInt cstart = A->cmap->rstart; 2905 PetscInt *cmap = mat->garray; 2906 PetscInt *diagIdx, *offdiagIdx; 2907 Vec diagV, offdiagV; 2908 PetscScalar *a, *diagA, *offdiagA; 2909 PetscInt r; 2910 PetscErrorCode ierr; 2911 2912 PetscFunctionBegin; 2913 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2914 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2915 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2916 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2917 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2918 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2919 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2920 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2921 for(r = 0; r < n; ++r) { 2922 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2923 a[r] = diagA[r]; 2924 idx[r] = cstart + diagIdx[r]; 2925 } else { 2926 a[r] = offdiagA[r]; 2927 idx[r] = cmap[offdiagIdx[r]]; 2928 } 2929 } 2930 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2931 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2932 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2933 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2934 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2935 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2936 PetscFunctionReturn(0); 2937 } 2938 2939 #undef __FUNCT__ 2940 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2941 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2942 { 2943 PetscErrorCode ierr; 2944 Mat *dummy; 2945 2946 PetscFunctionBegin; 2947 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2948 *newmat = *dummy; 2949 ierr = PetscFree(dummy);CHKERRQ(ierr); 2950 PetscFunctionReturn(0); 2951 } 2952 2953 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2954 2955 #undef __FUNCT__ 2956 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2957 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2958 { 2959 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2960 PetscErrorCode ierr; 2961 2962 PetscFunctionBegin; 2963 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2964 PetscFunctionReturn(0); 2965 } 2966 2967 2968 /* -------------------------------------------------------------------*/ 2969 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2970 MatGetRow_MPIAIJ, 2971 MatRestoreRow_MPIAIJ, 2972 MatMult_MPIAIJ, 2973 /* 4*/ MatMultAdd_MPIAIJ, 2974 MatMultTranspose_MPIAIJ, 2975 MatMultTransposeAdd_MPIAIJ, 2976 #ifdef PETSC_HAVE_PBGL 2977 MatSolve_MPIAIJ, 2978 #else 2979 0, 2980 #endif 2981 0, 2982 0, 2983 /*10*/ 0, 2984 0, 2985 0, 2986 MatSOR_MPIAIJ, 2987 MatTranspose_MPIAIJ, 2988 /*15*/ MatGetInfo_MPIAIJ, 2989 MatEqual_MPIAIJ, 2990 MatGetDiagonal_MPIAIJ, 2991 MatDiagonalScale_MPIAIJ, 2992 MatNorm_MPIAIJ, 2993 /*20*/ MatAssemblyBegin_MPIAIJ, 2994 MatAssemblyEnd_MPIAIJ, 2995 MatSetOption_MPIAIJ, 2996 MatZeroEntries_MPIAIJ, 2997 /*24*/ MatZeroRows_MPIAIJ, 2998 0, 2999 #ifdef PETSC_HAVE_PBGL 3000 0, 3001 #else 3002 0, 3003 #endif 3004 0, 3005 0, 3006 /*29*/ MatSetUp_MPIAIJ, 3007 #ifdef PETSC_HAVE_PBGL 3008 0, 3009 #else 3010 0, 3011 #endif 3012 0, 3013 0, 3014 0, 3015 /*34*/ MatDuplicate_MPIAIJ, 3016 0, 3017 0, 3018 0, 3019 0, 3020 /*39*/ MatAXPY_MPIAIJ, 3021 MatGetSubMatrices_MPIAIJ, 3022 MatIncreaseOverlap_MPIAIJ, 3023 MatGetValues_MPIAIJ, 3024 MatCopy_MPIAIJ, 3025 /*44*/ MatGetRowMax_MPIAIJ, 3026 MatScale_MPIAIJ, 3027 0, 3028 0, 3029 MatZeroRowsColumns_MPIAIJ, 3030 /*49*/ MatSetBlockSize_MPIAIJ, 3031 0, 3032 0, 3033 0, 3034 0, 3035 /*54*/ MatFDColoringCreate_MPIAIJ, 3036 0, 3037 MatSetUnfactored_MPIAIJ, 3038 MatPermute_MPIAIJ, 3039 0, 3040 /*59*/ MatGetSubMatrix_MPIAIJ, 3041 MatDestroy_MPIAIJ, 3042 MatView_MPIAIJ, 3043 0, 3044 0, 3045 /*64*/ 0, 3046 0, 3047 0, 3048 0, 3049 0, 3050 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3051 MatGetRowMinAbs_MPIAIJ, 3052 0, 3053 MatSetColoring_MPIAIJ, 3054 #if defined(PETSC_HAVE_ADIC) 3055 MatSetValuesAdic_MPIAIJ, 3056 #else 3057 0, 3058 #endif 3059 MatSetValuesAdifor_MPIAIJ, 3060 /*75*/ MatFDColoringApply_AIJ, 3061 0, 3062 0, 3063 0, 3064 0, 3065 /*80*/ 0, 3066 0, 3067 0, 3068 /*83*/ MatLoad_MPIAIJ, 3069 0, 3070 0, 3071 0, 3072 0, 3073 0, 3074 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3075 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3076 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3077 MatPtAP_Basic, 3078 MatPtAPSymbolic_MPIAIJ, 3079 /*94*/ MatPtAPNumeric_MPIAIJ, 3080 0, 3081 0, 3082 0, 3083 0, 3084 /*99*/ 0, 3085 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3086 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3087 MatConjugate_MPIAIJ, 3088 0, 3089 /*104*/MatSetValuesRow_MPIAIJ, 3090 MatRealPart_MPIAIJ, 3091 MatImaginaryPart_MPIAIJ, 3092 0, 3093 0, 3094 /*109*/0, 3095 MatGetRedundantMatrix_MPIAIJ, 3096 MatGetRowMin_MPIAIJ, 3097 0, 3098 0, 3099 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3100 0, 3101 0, 3102 0, 3103 0, 3104 /*119*/0, 3105 0, 3106 0, 3107 0, 3108 MatGetMultiProcBlock_MPIAIJ, 3109 /*124*/MatFindNonZeroRows_MPIAIJ, 3110 MatGetColumnNorms_MPIAIJ, 3111 MatInvertBlockDiagonal_MPIAIJ, 3112 0, 3113 MatGetSubMatricesParallel_MPIAIJ, 3114 /*129*/0, 3115 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3116 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3117 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3118 0, 3119 /*134*/0, 3120 0, 3121 0, 3122 0, 3123 0 3124 }; 3125 3126 /* ----------------------------------------------------------------------------------------*/ 3127 3128 EXTERN_C_BEGIN 3129 #undef __FUNCT__ 3130 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3131 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3132 { 3133 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3134 PetscErrorCode ierr; 3135 3136 PetscFunctionBegin; 3137 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3138 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3139 PetscFunctionReturn(0); 3140 } 3141 EXTERN_C_END 3142 3143 EXTERN_C_BEGIN 3144 #undef __FUNCT__ 3145 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3146 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3147 { 3148 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3149 PetscErrorCode ierr; 3150 3151 PetscFunctionBegin; 3152 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3153 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3154 PetscFunctionReturn(0); 3155 } 3156 EXTERN_C_END 3157 3158 EXTERN_C_BEGIN 3159 #undef __FUNCT__ 3160 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3161 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3162 { 3163 Mat_MPIAIJ *b; 3164 PetscErrorCode ierr; 3165 PetscInt i; 3166 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3167 3168 PetscFunctionBegin; 3169 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3170 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3171 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3172 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3173 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3174 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3175 3176 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3177 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3178 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3179 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3180 if (d_nnz) { 3181 for (i=0; i<B->rmap->n; i++) { 3182 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3183 } 3184 } 3185 if (o_nnz) { 3186 for (i=0; i<B->rmap->n; i++) { 3187 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3188 } 3189 } 3190 b = (Mat_MPIAIJ*)B->data; 3191 3192 if (!B->preallocated) { 3193 /* Explicitly create 2 MATSEQAIJ matrices. */ 3194 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3195 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3196 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3197 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3198 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3199 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3200 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3201 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3202 } 3203 3204 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3205 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3206 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3207 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3208 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3209 B->preallocated = PETSC_TRUE; 3210 PetscFunctionReturn(0); 3211 } 3212 EXTERN_C_END 3213 3214 #undef __FUNCT__ 3215 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3216 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3217 { 3218 Mat mat; 3219 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3220 PetscErrorCode ierr; 3221 3222 PetscFunctionBegin; 3223 *newmat = 0; 3224 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3225 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3226 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3227 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3228 a = (Mat_MPIAIJ*)mat->data; 3229 3230 mat->factortype = matin->factortype; 3231 mat->rmap->bs = matin->rmap->bs; 3232 mat->assembled = PETSC_TRUE; 3233 mat->insertmode = NOT_SET_VALUES; 3234 mat->preallocated = PETSC_TRUE; 3235 3236 a->size = oldmat->size; 3237 a->rank = oldmat->rank; 3238 a->donotstash = oldmat->donotstash; 3239 a->roworiented = oldmat->roworiented; 3240 a->rowindices = 0; 3241 a->rowvalues = 0; 3242 a->getrowactive = PETSC_FALSE; 3243 3244 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3245 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3246 3247 if (oldmat->colmap) { 3248 #if defined (PETSC_USE_CTABLE) 3249 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3250 #else 3251 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3252 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3253 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3254 #endif 3255 } else a->colmap = 0; 3256 if (oldmat->garray) { 3257 PetscInt len; 3258 len = oldmat->B->cmap->n; 3259 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3260 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3261 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3262 } else a->garray = 0; 3263 3264 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3265 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3266 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3267 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3268 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3269 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3270 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3271 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3272 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3273 *newmat = mat; 3274 PetscFunctionReturn(0); 3275 } 3276 3277 3278 3279 #undef __FUNCT__ 3280 #define __FUNCT__ "MatLoad_MPIAIJ" 3281 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3282 { 3283 PetscScalar *vals,*svals; 3284 MPI_Comm comm = ((PetscObject)viewer)->comm; 3285 PetscErrorCode ierr; 3286 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3287 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3288 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3289 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3290 PetscInt cend,cstart,n,*rowners,sizesset=1; 3291 int fd; 3292 3293 PetscFunctionBegin; 3294 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3295 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3296 if (!rank) { 3297 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3298 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3299 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3300 } 3301 3302 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3303 3304 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3305 M = header[1]; N = header[2]; 3306 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3307 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3308 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3309 3310 /* If global sizes are set, check if they are consistent with that given in the file */ 3311 if (sizesset) { 3312 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3313 } 3314 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3315 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3316 3317 /* determine ownership of all rows */ 3318 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3319 else m = newMat->rmap->n; /* Set by user */ 3320 3321 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3322 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3323 3324 /* First process needs enough room for process with most rows */ 3325 if (!rank) { 3326 mmax = rowners[1]; 3327 for (i=2; i<size; i++) { 3328 mmax = PetscMax(mmax,rowners[i]); 3329 } 3330 } else mmax = m; 3331 3332 rowners[0] = 0; 3333 for (i=2; i<=size; i++) { 3334 rowners[i] += rowners[i-1]; 3335 } 3336 rstart = rowners[rank]; 3337 rend = rowners[rank+1]; 3338 3339 /* distribute row lengths to all processors */ 3340 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3341 if (!rank) { 3342 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3343 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3344 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3345 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3346 for (j=0; j<m; j++) { 3347 procsnz[0] += ourlens[j]; 3348 } 3349 for (i=1; i<size; i++) { 3350 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3351 /* calculate the number of nonzeros on each processor */ 3352 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3353 procsnz[i] += rowlengths[j]; 3354 } 3355 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3356 } 3357 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3358 } else { 3359 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3360 } 3361 3362 if (!rank) { 3363 /* determine max buffer needed and allocate it */ 3364 maxnz = 0; 3365 for (i=0; i<size; i++) { 3366 maxnz = PetscMax(maxnz,procsnz[i]); 3367 } 3368 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3369 3370 /* read in my part of the matrix column indices */ 3371 nz = procsnz[0]; 3372 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3373 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3374 3375 /* read in every one elses and ship off */ 3376 for (i=1; i<size; i++) { 3377 nz = procsnz[i]; 3378 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3379 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3380 } 3381 ierr = PetscFree(cols);CHKERRQ(ierr); 3382 } else { 3383 /* determine buffer space needed for message */ 3384 nz = 0; 3385 for (i=0; i<m; i++) { 3386 nz += ourlens[i]; 3387 } 3388 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3389 3390 /* receive message of column indices*/ 3391 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3392 } 3393 3394 /* determine column ownership if matrix is not square */ 3395 if (N != M) { 3396 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3397 else n = newMat->cmap->n; 3398 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3399 cstart = cend - n; 3400 } else { 3401 cstart = rstart; 3402 cend = rend; 3403 n = cend - cstart; 3404 } 3405 3406 /* loop over local rows, determining number of off diagonal entries */ 3407 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3408 jj = 0; 3409 for (i=0; i<m; i++) { 3410 for (j=0; j<ourlens[i]; j++) { 3411 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3412 jj++; 3413 } 3414 } 3415 3416 for (i=0; i<m; i++) { 3417 ourlens[i] -= offlens[i]; 3418 } 3419 if (!sizesset) { 3420 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3421 } 3422 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3423 3424 for (i=0; i<m; i++) { 3425 ourlens[i] += offlens[i]; 3426 } 3427 3428 if (!rank) { 3429 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3430 3431 /* read in my part of the matrix numerical values */ 3432 nz = procsnz[0]; 3433 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3434 3435 /* insert into matrix */ 3436 jj = rstart; 3437 smycols = mycols; 3438 svals = vals; 3439 for (i=0; i<m; i++) { 3440 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3441 smycols += ourlens[i]; 3442 svals += ourlens[i]; 3443 jj++; 3444 } 3445 3446 /* read in other processors and ship out */ 3447 for (i=1; i<size; i++) { 3448 nz = procsnz[i]; 3449 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3450 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3451 } 3452 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3453 } else { 3454 /* receive numeric values */ 3455 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3456 3457 /* receive message of values*/ 3458 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3459 3460 /* insert into matrix */ 3461 jj = rstart; 3462 smycols = mycols; 3463 svals = vals; 3464 for (i=0; i<m; i++) { 3465 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3466 smycols += ourlens[i]; 3467 svals += ourlens[i]; 3468 jj++; 3469 } 3470 } 3471 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3472 ierr = PetscFree(vals);CHKERRQ(ierr); 3473 ierr = PetscFree(mycols);CHKERRQ(ierr); 3474 ierr = PetscFree(rowners);CHKERRQ(ierr); 3475 3476 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3477 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3478 PetscFunctionReturn(0); 3479 } 3480 3481 #undef __FUNCT__ 3482 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3483 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3484 { 3485 PetscErrorCode ierr; 3486 IS iscol_local; 3487 PetscInt csize; 3488 3489 PetscFunctionBegin; 3490 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3491 if (call == MAT_REUSE_MATRIX) { 3492 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3493 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3494 } else { 3495 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3496 } 3497 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3498 if (call == MAT_INITIAL_MATRIX) { 3499 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3500 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3501 } 3502 PetscFunctionReturn(0); 3503 } 3504 3505 #undef __FUNCT__ 3506 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3507 /* 3508 Not great since it makes two copies of the submatrix, first an SeqAIJ 3509 in local and then by concatenating the local matrices the end result. 3510 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3511 3512 Note: This requires a sequential iscol with all indices. 3513 */ 3514 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3515 { 3516 PetscErrorCode ierr; 3517 PetscMPIInt rank,size; 3518 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3519 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3520 Mat *local,M,Mreuse; 3521 MatScalar *vwork,*aa; 3522 MPI_Comm comm = ((PetscObject)mat)->comm; 3523 Mat_SeqAIJ *aij; 3524 3525 3526 PetscFunctionBegin; 3527 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3528 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3529 3530 if (call == MAT_REUSE_MATRIX) { 3531 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3532 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3533 local = &Mreuse; 3534 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3535 } else { 3536 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3537 Mreuse = *local; 3538 ierr = PetscFree(local);CHKERRQ(ierr); 3539 } 3540 3541 /* 3542 m - number of local rows 3543 n - number of columns (same on all processors) 3544 rstart - first row in new global matrix generated 3545 */ 3546 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3547 if (call == MAT_INITIAL_MATRIX) { 3548 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3549 ii = aij->i; 3550 jj = aij->j; 3551 3552 /* 3553 Determine the number of non-zeros in the diagonal and off-diagonal 3554 portions of the matrix in order to do correct preallocation 3555 */ 3556 3557 /* first get start and end of "diagonal" columns */ 3558 if (csize == PETSC_DECIDE) { 3559 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3560 if (mglobal == n) { /* square matrix */ 3561 nlocal = m; 3562 } else { 3563 nlocal = n/size + ((n % size) > rank); 3564 } 3565 } else { 3566 nlocal = csize; 3567 } 3568 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3569 rstart = rend - nlocal; 3570 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3571 3572 /* next, compute all the lengths */ 3573 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3574 olens = dlens + m; 3575 for (i=0; i<m; i++) { 3576 jend = ii[i+1] - ii[i]; 3577 olen = 0; 3578 dlen = 0; 3579 for (j=0; j<jend; j++) { 3580 if (*jj < rstart || *jj >= rend) olen++; 3581 else dlen++; 3582 jj++; 3583 } 3584 olens[i] = olen; 3585 dlens[i] = dlen; 3586 } 3587 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3588 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3589 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3590 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3591 ierr = PetscFree(dlens);CHKERRQ(ierr); 3592 } else { 3593 PetscInt ml,nl; 3594 3595 M = *newmat; 3596 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3597 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3598 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3599 /* 3600 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3601 rather than the slower MatSetValues(). 3602 */ 3603 M->was_assembled = PETSC_TRUE; 3604 M->assembled = PETSC_FALSE; 3605 } 3606 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3607 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3608 ii = aij->i; 3609 jj = aij->j; 3610 aa = aij->a; 3611 for (i=0; i<m; i++) { 3612 row = rstart + i; 3613 nz = ii[i+1] - ii[i]; 3614 cwork = jj; jj += nz; 3615 vwork = aa; aa += nz; 3616 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3617 } 3618 3619 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3620 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3621 *newmat = M; 3622 3623 /* save submatrix used in processor for next request */ 3624 if (call == MAT_INITIAL_MATRIX) { 3625 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3626 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3627 } 3628 3629 PetscFunctionReturn(0); 3630 } 3631 3632 EXTERN_C_BEGIN 3633 #undef __FUNCT__ 3634 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3635 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3636 { 3637 PetscInt m,cstart, cend,j,nnz,i,d; 3638 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3639 const PetscInt *JJ; 3640 PetscScalar *values; 3641 PetscErrorCode ierr; 3642 3643 PetscFunctionBegin; 3644 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3645 3646 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3647 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3648 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3649 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3650 m = B->rmap->n; 3651 cstart = B->cmap->rstart; 3652 cend = B->cmap->rend; 3653 rstart = B->rmap->rstart; 3654 3655 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3656 3657 #if defined(PETSC_USE_DEBUGGING) 3658 for (i=0; i<m; i++) { 3659 nnz = Ii[i+1]- Ii[i]; 3660 JJ = J + Ii[i]; 3661 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3662 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3663 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3664 } 3665 #endif 3666 3667 for (i=0; i<m; i++) { 3668 nnz = Ii[i+1]- Ii[i]; 3669 JJ = J + Ii[i]; 3670 nnz_max = PetscMax(nnz_max,nnz); 3671 d = 0; 3672 for (j=0; j<nnz; j++) { 3673 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3674 } 3675 d_nnz[i] = d; 3676 o_nnz[i] = nnz - d; 3677 } 3678 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3679 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3680 3681 if (v) values = (PetscScalar*)v; 3682 else { 3683 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3684 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3685 } 3686 3687 for (i=0; i<m; i++) { 3688 ii = i + rstart; 3689 nnz = Ii[i+1]- Ii[i]; 3690 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3691 } 3692 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3693 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3694 3695 if (!v) { 3696 ierr = PetscFree(values);CHKERRQ(ierr); 3697 } 3698 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3699 PetscFunctionReturn(0); 3700 } 3701 EXTERN_C_END 3702 3703 #undef __FUNCT__ 3704 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3705 /*@ 3706 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3707 (the default parallel PETSc format). 3708 3709 Collective on MPI_Comm 3710 3711 Input Parameters: 3712 + B - the matrix 3713 . i - the indices into j for the start of each local row (starts with zero) 3714 . j - the column indices for each local row (starts with zero) 3715 - v - optional values in the matrix 3716 3717 Level: developer 3718 3719 Notes: 3720 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3721 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3722 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3723 3724 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3725 3726 The format which is used for the sparse matrix input, is equivalent to a 3727 row-major ordering.. i.e for the following matrix, the input data expected is 3728 as shown: 3729 3730 1 0 0 3731 2 0 3 P0 3732 ------- 3733 4 5 6 P1 3734 3735 Process0 [P0]: rows_owned=[0,1] 3736 i = {0,1,3} [size = nrow+1 = 2+1] 3737 j = {0,0,2} [size = nz = 6] 3738 v = {1,2,3} [size = nz = 6] 3739 3740 Process1 [P1]: rows_owned=[2] 3741 i = {0,3} [size = nrow+1 = 1+1] 3742 j = {0,1,2} [size = nz = 6] 3743 v = {4,5,6} [size = nz = 6] 3744 3745 .keywords: matrix, aij, compressed row, sparse, parallel 3746 3747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3748 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3749 @*/ 3750 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3751 { 3752 PetscErrorCode ierr; 3753 3754 PetscFunctionBegin; 3755 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3756 PetscFunctionReturn(0); 3757 } 3758 3759 #undef __FUNCT__ 3760 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3761 /*@C 3762 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3763 (the default parallel PETSc format). For good matrix assembly performance 3764 the user should preallocate the matrix storage by setting the parameters 3765 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3766 performance can be increased by more than a factor of 50. 3767 3768 Collective on MPI_Comm 3769 3770 Input Parameters: 3771 + A - the matrix 3772 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3773 (same value is used for all local rows) 3774 . d_nnz - array containing the number of nonzeros in the various rows of the 3775 DIAGONAL portion of the local submatrix (possibly different for each row) 3776 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3777 The size of this array is equal to the number of local rows, i.e 'm'. 3778 For matrices that will be factored, you must leave room for (and set) 3779 the diagonal entry even if it is zero. 3780 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3781 submatrix (same value is used for all local rows). 3782 - o_nnz - array containing the number of nonzeros in the various rows of the 3783 OFF-DIAGONAL portion of the local submatrix (possibly different for 3784 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3785 structure. The size of this array is equal to the number 3786 of local rows, i.e 'm'. 3787 3788 If the *_nnz parameter is given then the *_nz parameter is ignored 3789 3790 The AIJ format (also called the Yale sparse matrix format or 3791 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3792 storage. The stored row and column indices begin with zero. 3793 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3794 3795 The parallel matrix is partitioned such that the first m0 rows belong to 3796 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3797 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3798 3799 The DIAGONAL portion of the local submatrix of a processor can be defined 3800 as the submatrix which is obtained by extraction the part corresponding to 3801 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3802 first row that belongs to the processor, r2 is the last row belonging to 3803 the this processor, and c1-c2 is range of indices of the local part of a 3804 vector suitable for applying the matrix to. This is an mxn matrix. In the 3805 common case of a square matrix, the row and column ranges are the same and 3806 the DIAGONAL part is also square. The remaining portion of the local 3807 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3808 3809 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3810 3811 You can call MatGetInfo() to get information on how effective the preallocation was; 3812 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3813 You can also run with the option -info and look for messages with the string 3814 malloc in them to see if additional memory allocation was needed. 3815 3816 Example usage: 3817 3818 Consider the following 8x8 matrix with 34 non-zero values, that is 3819 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3820 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3821 as follows: 3822 3823 .vb 3824 1 2 0 | 0 3 0 | 0 4 3825 Proc0 0 5 6 | 7 0 0 | 8 0 3826 9 0 10 | 11 0 0 | 12 0 3827 ------------------------------------- 3828 13 0 14 | 15 16 17 | 0 0 3829 Proc1 0 18 0 | 19 20 21 | 0 0 3830 0 0 0 | 22 23 0 | 24 0 3831 ------------------------------------- 3832 Proc2 25 26 27 | 0 0 28 | 29 0 3833 30 0 0 | 31 32 33 | 0 34 3834 .ve 3835 3836 This can be represented as a collection of submatrices as: 3837 3838 .vb 3839 A B C 3840 D E F 3841 G H I 3842 .ve 3843 3844 Where the submatrices A,B,C are owned by proc0, D,E,F are 3845 owned by proc1, G,H,I are owned by proc2. 3846 3847 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3848 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3849 The 'M','N' parameters are 8,8, and have the same values on all procs. 3850 3851 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3852 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3853 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3854 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3855 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3856 matrix, ans [DF] as another SeqAIJ matrix. 3857 3858 When d_nz, o_nz parameters are specified, d_nz storage elements are 3859 allocated for every row of the local diagonal submatrix, and o_nz 3860 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3861 One way to choose d_nz and o_nz is to use the max nonzerors per local 3862 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3863 In this case, the values of d_nz,o_nz are: 3864 .vb 3865 proc0 : dnz = 2, o_nz = 2 3866 proc1 : dnz = 3, o_nz = 2 3867 proc2 : dnz = 1, o_nz = 4 3868 .ve 3869 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3870 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3871 for proc3. i.e we are using 12+15+10=37 storage locations to store 3872 34 values. 3873 3874 When d_nnz, o_nnz parameters are specified, the storage is specified 3875 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3876 In the above case the values for d_nnz,o_nnz are: 3877 .vb 3878 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3879 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3880 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3881 .ve 3882 Here the space allocated is sum of all the above values i.e 34, and 3883 hence pre-allocation is perfect. 3884 3885 Level: intermediate 3886 3887 .keywords: matrix, aij, compressed row, sparse, parallel 3888 3889 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3890 MPIAIJ, MatGetInfo() 3891 @*/ 3892 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3893 { 3894 PetscErrorCode ierr; 3895 3896 PetscFunctionBegin; 3897 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3898 PetscValidType(B,1); 3899 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3900 PetscFunctionReturn(0); 3901 } 3902 3903 #undef __FUNCT__ 3904 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3905 /*@ 3906 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3907 CSR format the local rows. 3908 3909 Collective on MPI_Comm 3910 3911 Input Parameters: 3912 + comm - MPI communicator 3913 . m - number of local rows (Cannot be PETSC_DECIDE) 3914 . n - This value should be the same as the local size used in creating the 3915 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3916 calculated if N is given) For square matrices n is almost always m. 3917 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3918 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3919 . i - row indices 3920 . j - column indices 3921 - a - matrix values 3922 3923 Output Parameter: 3924 . mat - the matrix 3925 3926 Level: intermediate 3927 3928 Notes: 3929 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3930 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3931 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3932 3933 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3934 3935 The format which is used for the sparse matrix input, is equivalent to a 3936 row-major ordering.. i.e for the following matrix, the input data expected is 3937 as shown: 3938 3939 1 0 0 3940 2 0 3 P0 3941 ------- 3942 4 5 6 P1 3943 3944 Process0 [P0]: rows_owned=[0,1] 3945 i = {0,1,3} [size = nrow+1 = 2+1] 3946 j = {0,0,2} [size = nz = 6] 3947 v = {1,2,3} [size = nz = 6] 3948 3949 Process1 [P1]: rows_owned=[2] 3950 i = {0,3} [size = nrow+1 = 1+1] 3951 j = {0,1,2} [size = nz = 6] 3952 v = {4,5,6} [size = nz = 6] 3953 3954 .keywords: matrix, aij, compressed row, sparse, parallel 3955 3956 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3957 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3958 @*/ 3959 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3960 { 3961 PetscErrorCode ierr; 3962 3963 PetscFunctionBegin; 3964 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3965 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3966 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3967 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3968 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3969 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3970 PetscFunctionReturn(0); 3971 } 3972 3973 #undef __FUNCT__ 3974 #define __FUNCT__ "MatCreateAIJ" 3975 /*@C 3976 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3977 (the default parallel PETSc format). For good matrix assembly performance 3978 the user should preallocate the matrix storage by setting the parameters 3979 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3980 performance can be increased by more than a factor of 50. 3981 3982 Collective on MPI_Comm 3983 3984 Input Parameters: 3985 + comm - MPI communicator 3986 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3987 This value should be the same as the local size used in creating the 3988 y vector for the matrix-vector product y = Ax. 3989 . n - This value should be the same as the local size used in creating the 3990 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3991 calculated if N is given) For square matrices n is almost always m. 3992 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3993 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3994 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3995 (same value is used for all local rows) 3996 . d_nnz - array containing the number of nonzeros in the various rows of the 3997 DIAGONAL portion of the local submatrix (possibly different for each row) 3998 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3999 The size of this array is equal to the number of local rows, i.e 'm'. 4000 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4001 submatrix (same value is used for all local rows). 4002 - o_nnz - array containing the number of nonzeros in the various rows of the 4003 OFF-DIAGONAL portion of the local submatrix (possibly different for 4004 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4005 structure. The size of this array is equal to the number 4006 of local rows, i.e 'm'. 4007 4008 Output Parameter: 4009 . A - the matrix 4010 4011 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4012 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4013 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4014 4015 Notes: 4016 If the *_nnz parameter is given then the *_nz parameter is ignored 4017 4018 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4019 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4020 storage requirements for this matrix. 4021 4022 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4023 processor than it must be used on all processors that share the object for 4024 that argument. 4025 4026 The user MUST specify either the local or global matrix dimensions 4027 (possibly both). 4028 4029 The parallel matrix is partitioned across processors such that the 4030 first m0 rows belong to process 0, the next m1 rows belong to 4031 process 1, the next m2 rows belong to process 2 etc.. where 4032 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4033 values corresponding to [m x N] submatrix. 4034 4035 The columns are logically partitioned with the n0 columns belonging 4036 to 0th partition, the next n1 columns belonging to the next 4037 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4038 4039 The DIAGONAL portion of the local submatrix on any given processor 4040 is the submatrix corresponding to the rows and columns m,n 4041 corresponding to the given processor. i.e diagonal matrix on 4042 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4043 etc. The remaining portion of the local submatrix [m x (N-n)] 4044 constitute the OFF-DIAGONAL portion. The example below better 4045 illustrates this concept. 4046 4047 For a square global matrix we define each processor's diagonal portion 4048 to be its local rows and the corresponding columns (a square submatrix); 4049 each processor's off-diagonal portion encompasses the remainder of the 4050 local matrix (a rectangular submatrix). 4051 4052 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4053 4054 When calling this routine with a single process communicator, a matrix of 4055 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4056 type of communicator, use the construction mechanism: 4057 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4058 4059 By default, this format uses inodes (identical nodes) when possible. 4060 We search for consecutive rows with the same nonzero structure, thereby 4061 reusing matrix information to achieve increased efficiency. 4062 4063 Options Database Keys: 4064 + -mat_no_inode - Do not use inodes 4065 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4066 - -mat_aij_oneindex - Internally use indexing starting at 1 4067 rather than 0. Note that when calling MatSetValues(), 4068 the user still MUST index entries starting at 0! 4069 4070 4071 Example usage: 4072 4073 Consider the following 8x8 matrix with 34 non-zero values, that is 4074 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4075 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4076 as follows: 4077 4078 .vb 4079 1 2 0 | 0 3 0 | 0 4 4080 Proc0 0 5 6 | 7 0 0 | 8 0 4081 9 0 10 | 11 0 0 | 12 0 4082 ------------------------------------- 4083 13 0 14 | 15 16 17 | 0 0 4084 Proc1 0 18 0 | 19 20 21 | 0 0 4085 0 0 0 | 22 23 0 | 24 0 4086 ------------------------------------- 4087 Proc2 25 26 27 | 0 0 28 | 29 0 4088 30 0 0 | 31 32 33 | 0 34 4089 .ve 4090 4091 This can be represented as a collection of submatrices as: 4092 4093 .vb 4094 A B C 4095 D E F 4096 G H I 4097 .ve 4098 4099 Where the submatrices A,B,C are owned by proc0, D,E,F are 4100 owned by proc1, G,H,I are owned by proc2. 4101 4102 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4103 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4104 The 'M','N' parameters are 8,8, and have the same values on all procs. 4105 4106 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4107 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4108 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4109 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4110 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4111 matrix, ans [DF] as another SeqAIJ matrix. 4112 4113 When d_nz, o_nz parameters are specified, d_nz storage elements are 4114 allocated for every row of the local diagonal submatrix, and o_nz 4115 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4116 One way to choose d_nz and o_nz is to use the max nonzerors per local 4117 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4118 In this case, the values of d_nz,o_nz are: 4119 .vb 4120 proc0 : dnz = 2, o_nz = 2 4121 proc1 : dnz = 3, o_nz = 2 4122 proc2 : dnz = 1, o_nz = 4 4123 .ve 4124 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4125 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4126 for proc3. i.e we are using 12+15+10=37 storage locations to store 4127 34 values. 4128 4129 When d_nnz, o_nnz parameters are specified, the storage is specified 4130 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4131 In the above case the values for d_nnz,o_nnz are: 4132 .vb 4133 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4134 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4135 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4136 .ve 4137 Here the space allocated is sum of all the above values i.e 34, and 4138 hence pre-allocation is perfect. 4139 4140 Level: intermediate 4141 4142 .keywords: matrix, aij, compressed row, sparse, parallel 4143 4144 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4145 MPIAIJ, MatCreateMPIAIJWithArrays() 4146 @*/ 4147 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4148 { 4149 PetscErrorCode ierr; 4150 PetscMPIInt size; 4151 4152 PetscFunctionBegin; 4153 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4154 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4155 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4156 if (size > 1) { 4157 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4158 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4159 } else { 4160 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4161 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4162 } 4163 PetscFunctionReturn(0); 4164 } 4165 4166 #undef __FUNCT__ 4167 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4168 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4169 { 4170 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4171 4172 PetscFunctionBegin; 4173 *Ad = a->A; 4174 *Ao = a->B; 4175 *colmap = a->garray; 4176 PetscFunctionReturn(0); 4177 } 4178 4179 #undef __FUNCT__ 4180 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4181 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4182 { 4183 PetscErrorCode ierr; 4184 PetscInt i; 4185 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4186 4187 PetscFunctionBegin; 4188 if (coloring->ctype == IS_COLORING_GLOBAL) { 4189 ISColoringValue *allcolors,*colors; 4190 ISColoring ocoloring; 4191 4192 /* set coloring for diagonal portion */ 4193 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4194 4195 /* set coloring for off-diagonal portion */ 4196 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4197 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4198 for (i=0; i<a->B->cmap->n; i++) { 4199 colors[i] = allcolors[a->garray[i]]; 4200 } 4201 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4202 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4203 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4204 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4205 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4206 ISColoringValue *colors; 4207 PetscInt *larray; 4208 ISColoring ocoloring; 4209 4210 /* set coloring for diagonal portion */ 4211 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4212 for (i=0; i<a->A->cmap->n; i++) { 4213 larray[i] = i + A->cmap->rstart; 4214 } 4215 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4216 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4217 for (i=0; i<a->A->cmap->n; i++) { 4218 colors[i] = coloring->colors[larray[i]]; 4219 } 4220 ierr = PetscFree(larray);CHKERRQ(ierr); 4221 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4222 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4223 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4224 4225 /* set coloring for off-diagonal portion */ 4226 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4227 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4228 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4229 for (i=0; i<a->B->cmap->n; i++) { 4230 colors[i] = coloring->colors[larray[i]]; 4231 } 4232 ierr = PetscFree(larray);CHKERRQ(ierr); 4233 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4234 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4235 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4236 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4237 4238 PetscFunctionReturn(0); 4239 } 4240 4241 #if defined(PETSC_HAVE_ADIC) 4242 #undef __FUNCT__ 4243 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4244 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4245 { 4246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4247 PetscErrorCode ierr; 4248 4249 PetscFunctionBegin; 4250 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4251 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4252 PetscFunctionReturn(0); 4253 } 4254 #endif 4255 4256 #undef __FUNCT__ 4257 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4258 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4259 { 4260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4261 PetscErrorCode ierr; 4262 4263 PetscFunctionBegin; 4264 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4265 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4266 PetscFunctionReturn(0); 4267 } 4268 4269 #undef __FUNCT__ 4270 #define __FUNCT__ "MatMergeSymbolic" 4271 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4272 { 4273 PetscErrorCode ierr; 4274 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum; 4275 PetscInt *indx; 4276 4277 PetscFunctionBegin; 4278 /* This routine will ONLY return MPIAIJ type matrix */ 4279 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4280 if (n == PETSC_DECIDE){ 4281 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4282 } 4283 /* Check sum(n) = N */ 4284 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPIU_SUM,comm);CHKERRQ(ierr); 4285 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4286 4287 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4288 rstart -= m; 4289 4290 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4291 for (i=0;i<m;i++) { 4292 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4293 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4294 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4295 } 4296 4297 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4298 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4299 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4300 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4301 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4302 PetscFunctionReturn(0); 4303 } 4304 4305 #undef __FUNCT__ 4306 #define __FUNCT__ "MatMergeNumeric" 4307 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4308 { 4309 PetscErrorCode ierr; 4310 PetscInt m,N,i,rstart,nnz,Ii; 4311 PetscInt *indx; 4312 PetscScalar *values; 4313 4314 PetscFunctionBegin; 4315 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4316 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4317 for (i=0;i<m;i++) { 4318 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4319 Ii = i + rstart; 4320 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4321 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4322 } 4323 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4324 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4325 PetscFunctionReturn(0); 4326 } 4327 4328 #undef __FUNCT__ 4329 #define __FUNCT__ "MatMerge" 4330 /*@ 4331 MatMerge - Creates a single large PETSc matrix by concatenating sequential 4332 matrices from each processor 4333 4334 Collective on MPI_Comm 4335 4336 Input Parameters: 4337 + comm - the communicators the parallel matrix will live on 4338 . inmat - the input sequential matrices 4339 . n - number of local columns (or PETSC_DECIDE) 4340 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4341 4342 Output Parameter: 4343 . outmat - the parallel matrix generated 4344 4345 Level: advanced 4346 4347 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4348 4349 @*/ 4350 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4351 { 4352 PetscErrorCode ierr; 4353 4354 PetscFunctionBegin; 4355 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4356 if (scall == MAT_INITIAL_MATRIX){ 4357 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4358 } 4359 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4360 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4361 PetscFunctionReturn(0); 4362 } 4363 4364 #undef __FUNCT__ 4365 #define __FUNCT__ "MatFileSplit" 4366 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4367 { 4368 PetscErrorCode ierr; 4369 PetscMPIInt rank; 4370 PetscInt m,N,i,rstart,nnz; 4371 size_t len; 4372 const PetscInt *indx; 4373 PetscViewer out; 4374 char *name; 4375 Mat B; 4376 const PetscScalar *values; 4377 4378 PetscFunctionBegin; 4379 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4380 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4381 /* Should this be the type of the diagonal block of A? */ 4382 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4383 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4384 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4385 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4386 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4387 for (i=0;i<m;i++) { 4388 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4389 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4390 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4391 } 4392 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4393 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4394 4395 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4396 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4397 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4398 sprintf(name,"%s.%d",outfile,rank); 4399 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4400 ierr = PetscFree(name); 4401 ierr = MatView(B,out);CHKERRQ(ierr); 4402 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4403 ierr = MatDestroy(&B);CHKERRQ(ierr); 4404 PetscFunctionReturn(0); 4405 } 4406 4407 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4408 #undef __FUNCT__ 4409 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4410 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4411 { 4412 PetscErrorCode ierr; 4413 Mat_Merge_SeqsToMPI *merge; 4414 PetscContainer container; 4415 4416 PetscFunctionBegin; 4417 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4418 if (container) { 4419 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4420 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4426 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4427 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4428 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4429 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4430 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4431 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4432 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4433 ierr = PetscFree(merge);CHKERRQ(ierr); 4434 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4435 } 4436 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4437 PetscFunctionReturn(0); 4438 } 4439 4440 #include <../src/mat/utils/freespace.h> 4441 #include <petscbt.h> 4442 4443 #undef __FUNCT__ 4444 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4445 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4446 { 4447 PetscErrorCode ierr; 4448 MPI_Comm comm=((PetscObject)mpimat)->comm; 4449 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4450 PetscMPIInt size,rank,taga,*len_s; 4451 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4452 PetscInt proc,m; 4453 PetscInt **buf_ri,**buf_rj; 4454 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4455 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4456 MPI_Request *s_waits,*r_waits; 4457 MPI_Status *status; 4458 MatScalar *aa=a->a; 4459 MatScalar **abuf_r,*ba_i; 4460 Mat_Merge_SeqsToMPI *merge; 4461 PetscContainer container; 4462 4463 PetscFunctionBegin; 4464 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4465 4466 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4467 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4468 4469 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4470 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4471 4472 bi = merge->bi; 4473 bj = merge->bj; 4474 buf_ri = merge->buf_ri; 4475 buf_rj = merge->buf_rj; 4476 4477 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4478 owners = merge->rowmap->range; 4479 len_s = merge->len_s; 4480 4481 /* send and recv matrix values */ 4482 /*-----------------------------*/ 4483 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4484 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4485 4486 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4487 for (proc=0,k=0; proc<size; proc++){ 4488 if (!len_s[proc]) continue; 4489 i = owners[proc]; 4490 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4491 k++; 4492 } 4493 4494 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4495 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4496 ierr = PetscFree(status);CHKERRQ(ierr); 4497 4498 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4499 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4500 4501 /* insert mat values of mpimat */ 4502 /*----------------------------*/ 4503 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4504 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4505 4506 for (k=0; k<merge->nrecv; k++){ 4507 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4508 nrows = *(buf_ri_k[k]); 4509 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4510 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4511 } 4512 4513 /* set values of ba */ 4514 m = merge->rowmap->n; 4515 for (i=0; i<m; i++) { 4516 arow = owners[rank] + i; 4517 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4518 bnzi = bi[i+1] - bi[i]; 4519 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4520 4521 /* add local non-zero vals of this proc's seqmat into ba */ 4522 anzi = ai[arow+1] - ai[arow]; 4523 aj = a->j + ai[arow]; 4524 aa = a->a + ai[arow]; 4525 nextaj = 0; 4526 for (j=0; nextaj<anzi; j++){ 4527 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4528 ba_i[j] += aa[nextaj++]; 4529 } 4530 } 4531 4532 /* add received vals into ba */ 4533 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4534 /* i-th row */ 4535 if (i == *nextrow[k]) { 4536 anzi = *(nextai[k]+1) - *nextai[k]; 4537 aj = buf_rj[k] + *(nextai[k]); 4538 aa = abuf_r[k] + *(nextai[k]); 4539 nextaj = 0; 4540 for (j=0; nextaj<anzi; j++){ 4541 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4542 ba_i[j] += aa[nextaj++]; 4543 } 4544 } 4545 nextrow[k]++; nextai[k]++; 4546 } 4547 } 4548 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4549 } 4550 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4551 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4552 4553 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4554 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4555 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4556 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4557 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4558 PetscFunctionReturn(0); 4559 } 4560 4561 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4562 4563 #undef __FUNCT__ 4564 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4565 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4566 { 4567 PetscErrorCode ierr; 4568 Mat B_mpi; 4569 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4570 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4571 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4572 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4573 PetscInt len,proc,*dnz,*onz; 4574 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4575 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4576 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4577 MPI_Status *status; 4578 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4579 PetscBT lnkbt; 4580 Mat_Merge_SeqsToMPI *merge; 4581 PetscContainer container; 4582 4583 PetscFunctionBegin; 4584 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4585 4586 /* make sure it is a PETSc comm */ 4587 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4588 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4589 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4590 4591 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4592 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4593 4594 /* determine row ownership */ 4595 /*---------------------------------------------------------*/ 4596 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4597 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4598 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4599 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4600 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4601 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4602 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4603 4604 m = merge->rowmap->n; 4605 M = merge->rowmap->N; 4606 owners = merge->rowmap->range; 4607 4608 /* determine the number of messages to send, their lengths */ 4609 /*---------------------------------------------------------*/ 4610 len_s = merge->len_s; 4611 4612 len = 0; /* length of buf_si[] */ 4613 merge->nsend = 0; 4614 for (proc=0; proc<size; proc++){ 4615 len_si[proc] = 0; 4616 if (proc == rank){ 4617 len_s[proc] = 0; 4618 } else { 4619 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4620 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4621 } 4622 if (len_s[proc]) { 4623 merge->nsend++; 4624 nrows = 0; 4625 for (i=owners[proc]; i<owners[proc+1]; i++){ 4626 if (ai[i+1] > ai[i]) nrows++; 4627 } 4628 len_si[proc] = 2*(nrows+1); 4629 len += len_si[proc]; 4630 } 4631 } 4632 4633 /* determine the number and length of messages to receive for ij-structure */ 4634 /*-------------------------------------------------------------------------*/ 4635 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4636 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4637 4638 /* post the Irecv of j-structure */ 4639 /*-------------------------------*/ 4640 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4641 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4642 4643 /* post the Isend of j-structure */ 4644 /*--------------------------------*/ 4645 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4646 4647 for (proc=0, k=0; proc<size; proc++){ 4648 if (!len_s[proc]) continue; 4649 i = owners[proc]; 4650 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4651 k++; 4652 } 4653 4654 /* receives and sends of j-structure are complete */ 4655 /*------------------------------------------------*/ 4656 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4657 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4658 4659 /* send and recv i-structure */ 4660 /*---------------------------*/ 4661 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4662 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4663 4664 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4665 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4666 for (proc=0,k=0; proc<size; proc++){ 4667 if (!len_s[proc]) continue; 4668 /* form outgoing message for i-structure: 4669 buf_si[0]: nrows to be sent 4670 [1:nrows]: row index (global) 4671 [nrows+1:2*nrows+1]: i-structure index 4672 */ 4673 /*-------------------------------------------*/ 4674 nrows = len_si[proc]/2 - 1; 4675 buf_si_i = buf_si + nrows+1; 4676 buf_si[0] = nrows; 4677 buf_si_i[0] = 0; 4678 nrows = 0; 4679 for (i=owners[proc]; i<owners[proc+1]; i++){ 4680 anzi = ai[i+1] - ai[i]; 4681 if (anzi) { 4682 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4683 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4684 nrows++; 4685 } 4686 } 4687 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4688 k++; 4689 buf_si += len_si[proc]; 4690 } 4691 4692 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4693 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4694 4695 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4696 for (i=0; i<merge->nrecv; i++){ 4697 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4698 } 4699 4700 ierr = PetscFree(len_si);CHKERRQ(ierr); 4701 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4702 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4703 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4704 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4705 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4706 ierr = PetscFree(status);CHKERRQ(ierr); 4707 4708 /* compute a local seq matrix in each processor */ 4709 /*----------------------------------------------*/ 4710 /* allocate bi array and free space for accumulating nonzero column info */ 4711 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4712 bi[0] = 0; 4713 4714 /* create and initialize a linked list */ 4715 nlnk = N+1; 4716 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4717 4718 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4719 len = 0; 4720 len = ai[owners[rank+1]] - ai[owners[rank]]; 4721 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4722 current_space = free_space; 4723 4724 /* determine symbolic info for each local row */ 4725 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4726 4727 for (k=0; k<merge->nrecv; k++){ 4728 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4729 nrows = *buf_ri_k[k]; 4730 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4731 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4732 } 4733 4734 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4735 len = 0; 4736 for (i=0;i<m;i++) { 4737 bnzi = 0; 4738 /* add local non-zero cols of this proc's seqmat into lnk */ 4739 arow = owners[rank] + i; 4740 anzi = ai[arow+1] - ai[arow]; 4741 aj = a->j + ai[arow]; 4742 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4743 bnzi += nlnk; 4744 /* add received col data into lnk */ 4745 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4746 if (i == *nextrow[k]) { /* i-th row */ 4747 anzi = *(nextai[k]+1) - *nextai[k]; 4748 aj = buf_rj[k] + *nextai[k]; 4749 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4750 bnzi += nlnk; 4751 nextrow[k]++; nextai[k]++; 4752 } 4753 } 4754 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4755 4756 /* if free space is not available, make more free space */ 4757 if (current_space->local_remaining<bnzi) { 4758 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4759 nspacedouble++; 4760 } 4761 /* copy data into free space, then initialize lnk */ 4762 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4763 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4764 4765 current_space->array += bnzi; 4766 current_space->local_used += bnzi; 4767 current_space->local_remaining -= bnzi; 4768 4769 bi[i+1] = bi[i] + bnzi; 4770 } 4771 4772 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4773 4774 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4775 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4776 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4777 4778 /* create symbolic parallel matrix B_mpi */ 4779 /*---------------------------------------*/ 4780 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4781 if (n==PETSC_DECIDE) { 4782 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4783 } else { 4784 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4785 } 4786 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4787 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4788 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4789 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4790 4791 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4792 B_mpi->assembled = PETSC_FALSE; 4793 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4794 merge->bi = bi; 4795 merge->bj = bj; 4796 merge->buf_ri = buf_ri; 4797 merge->buf_rj = buf_rj; 4798 merge->coi = PETSC_NULL; 4799 merge->coj = PETSC_NULL; 4800 merge->owners_co = PETSC_NULL; 4801 4802 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4803 4804 /* attach the supporting struct to B_mpi for reuse */ 4805 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4806 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4807 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4808 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4809 *mpimat = B_mpi; 4810 4811 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4812 PetscFunctionReturn(0); 4813 } 4814 4815 #undef __FUNCT__ 4816 #define __FUNCT__ "MatMerge_SeqsToMPI" 4817 /*@C 4818 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4819 matrices from each processor 4820 4821 Collective on MPI_Comm 4822 4823 Input Parameters: 4824 + comm - the communicators the parallel matrix will live on 4825 . seqmat - the input sequential matrices 4826 . m - number of local rows (or PETSC_DECIDE) 4827 . n - number of local columns (or PETSC_DECIDE) 4828 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4829 4830 Output Parameter: 4831 . mpimat - the parallel matrix generated 4832 4833 Level: advanced 4834 4835 Notes: 4836 The dimensions of the sequential matrix in each processor MUST be the same. 4837 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4838 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4839 @*/ 4840 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4841 { 4842 PetscErrorCode ierr; 4843 PetscMPIInt size; 4844 4845 PetscFunctionBegin; 4846 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4847 if (size == 1){ 4848 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4849 if (scall == MAT_INITIAL_MATRIX){ 4850 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4851 } else { 4852 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4853 } 4854 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4855 PetscFunctionReturn(0); 4856 } 4857 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4858 if (scall == MAT_INITIAL_MATRIX){ 4859 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4860 } 4861 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4862 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4863 PetscFunctionReturn(0); 4864 } 4865 4866 #undef __FUNCT__ 4867 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4868 /*@ 4869 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4870 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4871 with MatGetSize() 4872 4873 Not Collective 4874 4875 Input Parameters: 4876 + A - the matrix 4877 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4878 4879 Output Parameter: 4880 . A_loc - the local sequential matrix generated 4881 4882 Level: developer 4883 4884 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4885 4886 @*/ 4887 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4888 { 4889 PetscErrorCode ierr; 4890 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4891 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4892 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4893 MatScalar *aa=a->a,*ba=b->a,*cam; 4894 PetscScalar *ca; 4895 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4896 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4897 PetscBool match; 4898 4899 PetscFunctionBegin; 4900 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4901 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4902 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4903 if (scall == MAT_INITIAL_MATRIX){ 4904 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4905 ci[0] = 0; 4906 for (i=0; i<am; i++){ 4907 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4908 } 4909 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4910 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4911 k = 0; 4912 for (i=0; i<am; i++) { 4913 ncols_o = bi[i+1] - bi[i]; 4914 ncols_d = ai[i+1] - ai[i]; 4915 /* off-diagonal portion of A */ 4916 for (jo=0; jo<ncols_o; jo++) { 4917 col = cmap[*bj]; 4918 if (col >= cstart) break; 4919 cj[k] = col; bj++; 4920 ca[k++] = *ba++; 4921 } 4922 /* diagonal portion of A */ 4923 for (j=0; j<ncols_d; j++) { 4924 cj[k] = cstart + *aj++; 4925 ca[k++] = *aa++; 4926 } 4927 /* off-diagonal portion of A */ 4928 for (j=jo; j<ncols_o; j++) { 4929 cj[k] = cmap[*bj++]; 4930 ca[k++] = *ba++; 4931 } 4932 } 4933 /* put together the new matrix */ 4934 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4935 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4936 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4937 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4938 mat->free_a = PETSC_TRUE; 4939 mat->free_ij = PETSC_TRUE; 4940 mat->nonew = 0; 4941 } else if (scall == MAT_REUSE_MATRIX){ 4942 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4943 ci = mat->i; cj = mat->j; cam = mat->a; 4944 for (i=0; i<am; i++) { 4945 /* off-diagonal portion of A */ 4946 ncols_o = bi[i+1] - bi[i]; 4947 for (jo=0; jo<ncols_o; jo++) { 4948 col = cmap[*bj]; 4949 if (col >= cstart) break; 4950 *cam++ = *ba++; bj++; 4951 } 4952 /* diagonal portion of A */ 4953 ncols_d = ai[i+1] - ai[i]; 4954 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4955 /* off-diagonal portion of A */ 4956 for (j=jo; j<ncols_o; j++) { 4957 *cam++ = *ba++; bj++; 4958 } 4959 } 4960 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4961 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4962 PetscFunctionReturn(0); 4963 } 4964 4965 #undef __FUNCT__ 4966 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4967 /*@C 4968 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4969 4970 Not Collective 4971 4972 Input Parameters: 4973 + A - the matrix 4974 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4975 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4976 4977 Output Parameter: 4978 . A_loc - the local sequential matrix generated 4979 4980 Level: developer 4981 4982 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4983 4984 @*/ 4985 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4986 { 4987 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4988 PetscErrorCode ierr; 4989 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4990 IS isrowa,iscola; 4991 Mat *aloc; 4992 PetscBool match; 4993 4994 PetscFunctionBegin; 4995 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4996 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4997 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4998 if (!row){ 4999 start = A->rmap->rstart; end = A->rmap->rend; 5000 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5001 } else { 5002 isrowa = *row; 5003 } 5004 if (!col){ 5005 start = A->cmap->rstart; 5006 cmap = a->garray; 5007 nzA = a->A->cmap->n; 5008 nzB = a->B->cmap->n; 5009 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5010 ncols = 0; 5011 for (i=0; i<nzB; i++) { 5012 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5013 else break; 5014 } 5015 imark = i; 5016 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5017 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5018 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5019 } else { 5020 iscola = *col; 5021 } 5022 if (scall != MAT_INITIAL_MATRIX){ 5023 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5024 aloc[0] = *A_loc; 5025 } 5026 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5027 *A_loc = aloc[0]; 5028 ierr = PetscFree(aloc);CHKERRQ(ierr); 5029 if (!row){ 5030 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5031 } 5032 if (!col){ 5033 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5034 } 5035 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5036 PetscFunctionReturn(0); 5037 } 5038 5039 #undef __FUNCT__ 5040 #define __FUNCT__ "MatGetBrowsOfAcols" 5041 /*@C 5042 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5043 5044 Collective on Mat 5045 5046 Input Parameters: 5047 + A,B - the matrices in mpiaij format 5048 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5049 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5050 5051 Output Parameter: 5052 + rowb, colb - index sets of rows and columns of B to extract 5053 - B_seq - the sequential matrix generated 5054 5055 Level: developer 5056 5057 @*/ 5058 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5059 { 5060 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5061 PetscErrorCode ierr; 5062 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5063 IS isrowb,iscolb; 5064 Mat *bseq=PETSC_NULL; 5065 5066 PetscFunctionBegin; 5067 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5068 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5069 } 5070 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5071 5072 if (scall == MAT_INITIAL_MATRIX){ 5073 start = A->cmap->rstart; 5074 cmap = a->garray; 5075 nzA = a->A->cmap->n; 5076 nzB = a->B->cmap->n; 5077 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5078 ncols = 0; 5079 for (i=0; i<nzB; i++) { /* row < local row index */ 5080 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5081 else break; 5082 } 5083 imark = i; 5084 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5085 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5086 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5087 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5088 } else { 5089 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5090 isrowb = *rowb; iscolb = *colb; 5091 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5092 bseq[0] = *B_seq; 5093 } 5094 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5095 *B_seq = bseq[0]; 5096 ierr = PetscFree(bseq);CHKERRQ(ierr); 5097 if (!rowb){ 5098 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5099 } else { 5100 *rowb = isrowb; 5101 } 5102 if (!colb){ 5103 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5104 } else { 5105 *colb = iscolb; 5106 } 5107 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5108 PetscFunctionReturn(0); 5109 } 5110 5111 #undef __FUNCT__ 5112 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5113 /* 5114 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5115 of the OFF-DIAGONAL portion of local A 5116 5117 Collective on Mat 5118 5119 Input Parameters: 5120 + A,B - the matrices in mpiaij format 5121 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5122 5123 Output Parameter: 5124 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5125 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5126 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5127 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5128 5129 Level: developer 5130 5131 */ 5132 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5133 { 5134 VecScatter_MPI_General *gen_to,*gen_from; 5135 PetscErrorCode ierr; 5136 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5137 Mat_SeqAIJ *b_oth; 5138 VecScatter ctx=a->Mvctx; 5139 MPI_Comm comm=((PetscObject)ctx)->comm; 5140 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5141 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5142 PetscScalar *rvalues,*svalues; 5143 MatScalar *b_otha,*bufa,*bufA; 5144 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5145 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5146 MPI_Status *sstatus,rstatus; 5147 PetscMPIInt jj; 5148 PetscInt *cols,sbs,rbs; 5149 PetscScalar *vals; 5150 5151 PetscFunctionBegin; 5152 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5153 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5154 } 5155 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5156 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5157 5158 gen_to = (VecScatter_MPI_General*)ctx->todata; 5159 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5160 rvalues = gen_from->values; /* holds the length of receiving row */ 5161 svalues = gen_to->values; /* holds the length of sending row */ 5162 nrecvs = gen_from->n; 5163 nsends = gen_to->n; 5164 5165 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5166 srow = gen_to->indices; /* local row index to be sent */ 5167 sstarts = gen_to->starts; 5168 sprocs = gen_to->procs; 5169 sstatus = gen_to->sstatus; 5170 sbs = gen_to->bs; 5171 rstarts = gen_from->starts; 5172 rprocs = gen_from->procs; 5173 rbs = gen_from->bs; 5174 5175 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5176 if (scall == MAT_INITIAL_MATRIX){ 5177 /* i-array */ 5178 /*---------*/ 5179 /* post receives */ 5180 for (i=0; i<nrecvs; i++){ 5181 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5182 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5183 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5184 } 5185 5186 /* pack the outgoing message */ 5187 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5188 sstartsj[0] = 0; rstartsj[0] = 0; 5189 len = 0; /* total length of j or a array to be sent */ 5190 k = 0; 5191 for (i=0; i<nsends; i++){ 5192 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5193 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5194 for (j=0; j<nrows; j++) { 5195 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5196 for (l=0; l<sbs; l++){ 5197 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5198 rowlen[j*sbs+l] = ncols; 5199 len += ncols; 5200 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5201 } 5202 k++; 5203 } 5204 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5205 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5206 } 5207 /* recvs and sends of i-array are completed */ 5208 i = nrecvs; 5209 while (i--) { 5210 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5211 } 5212 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5213 5214 /* allocate buffers for sending j and a arrays */ 5215 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5216 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5217 5218 /* create i-array of B_oth */ 5219 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5220 b_othi[0] = 0; 5221 len = 0; /* total length of j or a array to be received */ 5222 k = 0; 5223 for (i=0; i<nrecvs; i++){ 5224 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5225 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5226 for (j=0; j<nrows; j++) { 5227 b_othi[k+1] = b_othi[k] + rowlen[j]; 5228 len += rowlen[j]; k++; 5229 } 5230 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5231 } 5232 5233 /* allocate space for j and a arrrays of B_oth */ 5234 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5235 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5236 5237 /* j-array */ 5238 /*---------*/ 5239 /* post receives of j-array */ 5240 for (i=0; i<nrecvs; i++){ 5241 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5242 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5243 } 5244 5245 /* pack the outgoing message j-array */ 5246 k = 0; 5247 for (i=0; i<nsends; i++){ 5248 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5249 bufJ = bufj+sstartsj[i]; 5250 for (j=0; j<nrows; j++) { 5251 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5252 for (ll=0; ll<sbs; ll++){ 5253 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5254 for (l=0; l<ncols; l++){ 5255 *bufJ++ = cols[l]; 5256 } 5257 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5258 } 5259 } 5260 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5261 } 5262 5263 /* recvs and sends of j-array are completed */ 5264 i = nrecvs; 5265 while (i--) { 5266 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5267 } 5268 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5269 } else if (scall == MAT_REUSE_MATRIX){ 5270 sstartsj = *startsj_s; 5271 rstartsj = *startsj_r; 5272 bufa = *bufa_ptr; 5273 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5274 b_otha = b_oth->a; 5275 } else { 5276 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5277 } 5278 5279 /* a-array */ 5280 /*---------*/ 5281 /* post receives of a-array */ 5282 for (i=0; i<nrecvs; i++){ 5283 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5284 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5285 } 5286 5287 /* pack the outgoing message a-array */ 5288 k = 0; 5289 for (i=0; i<nsends; i++){ 5290 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5291 bufA = bufa+sstartsj[i]; 5292 for (j=0; j<nrows; j++) { 5293 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5294 for (ll=0; ll<sbs; ll++){ 5295 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5296 for (l=0; l<ncols; l++){ 5297 *bufA++ = vals[l]; 5298 } 5299 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5300 } 5301 } 5302 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5303 } 5304 /* recvs and sends of a-array are completed */ 5305 i = nrecvs; 5306 while (i--) { 5307 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5308 } 5309 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5310 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5311 5312 if (scall == MAT_INITIAL_MATRIX){ 5313 /* put together the new matrix */ 5314 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5315 5316 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5317 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5318 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5319 b_oth->free_a = PETSC_TRUE; 5320 b_oth->free_ij = PETSC_TRUE; 5321 b_oth->nonew = 0; 5322 5323 ierr = PetscFree(bufj);CHKERRQ(ierr); 5324 if (!startsj_s || !bufa_ptr){ 5325 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5326 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5327 } else { 5328 *startsj_s = sstartsj; 5329 *startsj_r = rstartsj; 5330 *bufa_ptr = bufa; 5331 } 5332 } 5333 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5334 PetscFunctionReturn(0); 5335 } 5336 5337 #undef __FUNCT__ 5338 #define __FUNCT__ "MatGetCommunicationStructs" 5339 /*@C 5340 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5341 5342 Not Collective 5343 5344 Input Parameters: 5345 . A - The matrix in mpiaij format 5346 5347 Output Parameter: 5348 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5349 . colmap - A map from global column index to local index into lvec 5350 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5351 5352 Level: developer 5353 5354 @*/ 5355 #if defined (PETSC_USE_CTABLE) 5356 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5357 #else 5358 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5359 #endif 5360 { 5361 Mat_MPIAIJ *a; 5362 5363 PetscFunctionBegin; 5364 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5365 PetscValidPointer(lvec, 2); 5366 PetscValidPointer(colmap, 3); 5367 PetscValidPointer(multScatter, 4); 5368 a = (Mat_MPIAIJ *) A->data; 5369 if (lvec) *lvec = a->lvec; 5370 if (colmap) *colmap = a->colmap; 5371 if (multScatter) *multScatter = a->Mvctx; 5372 PetscFunctionReturn(0); 5373 } 5374 5375 EXTERN_C_BEGIN 5376 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5377 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5378 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5379 EXTERN_C_END 5380 5381 #undef __FUNCT__ 5382 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5383 /* 5384 Computes (B'*A')' since computing B*A directly is untenable 5385 5386 n p p 5387 ( ) ( ) ( ) 5388 m ( A ) * n ( B ) = m ( C ) 5389 ( ) ( ) ( ) 5390 5391 */ 5392 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5393 { 5394 PetscErrorCode ierr; 5395 Mat At,Bt,Ct; 5396 5397 PetscFunctionBegin; 5398 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5399 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5400 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5401 ierr = MatDestroy(&At);CHKERRQ(ierr); 5402 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5403 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5404 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5405 PetscFunctionReturn(0); 5406 } 5407 5408 #undef __FUNCT__ 5409 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5410 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5411 { 5412 PetscErrorCode ierr; 5413 PetscInt m=A->rmap->n,n=B->cmap->n; 5414 Mat Cmat; 5415 5416 PetscFunctionBegin; 5417 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5418 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5419 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5420 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5421 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5422 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5423 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5424 *C = Cmat; 5425 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5426 PetscFunctionReturn(0); 5427 } 5428 5429 /* ----------------------------------------------------------------*/ 5430 #undef __FUNCT__ 5431 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5432 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5433 { 5434 PetscErrorCode ierr; 5435 5436 PetscFunctionBegin; 5437 if (scall == MAT_INITIAL_MATRIX){ 5438 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5439 } 5440 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5441 PetscFunctionReturn(0); 5442 } 5443 5444 EXTERN_C_BEGIN 5445 #if defined(PETSC_HAVE_MUMPS) 5446 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5447 #endif 5448 #if defined(PETSC_HAVE_PASTIX) 5449 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5450 #endif 5451 #if defined(PETSC_HAVE_SUPERLU_DIST) 5452 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5453 #endif 5454 #if defined(PETSC_HAVE_SPOOLES) 5455 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5456 #endif 5457 EXTERN_C_END 5458 5459 /*MC 5460 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5461 5462 Options Database Keys: 5463 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5464 5465 Level: beginner 5466 5467 .seealso: MatCreateAIJ() 5468 M*/ 5469 5470 EXTERN_C_BEGIN 5471 #undef __FUNCT__ 5472 #define __FUNCT__ "MatCreate_MPIAIJ" 5473 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5474 { 5475 Mat_MPIAIJ *b; 5476 PetscErrorCode ierr; 5477 PetscMPIInt size; 5478 5479 PetscFunctionBegin; 5480 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5481 5482 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5483 B->data = (void*)b; 5484 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5485 B->rmap->bs = 1; 5486 B->assembled = PETSC_FALSE; 5487 5488 B->insertmode = NOT_SET_VALUES; 5489 b->size = size; 5490 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5491 5492 /* build cache for off array entries formed */ 5493 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5494 b->donotstash = PETSC_FALSE; 5495 b->colmap = 0; 5496 b->garray = 0; 5497 b->roworiented = PETSC_TRUE; 5498 5499 /* stuff used for matrix vector multiply */ 5500 b->lvec = PETSC_NULL; 5501 b->Mvctx = PETSC_NULL; 5502 5503 /* stuff for MatGetRow() */ 5504 b->rowindices = 0; 5505 b->rowvalues = 0; 5506 b->getrowactive = PETSC_FALSE; 5507 5508 #if defined(PETSC_HAVE_SPOOLES) 5509 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5510 "MatGetFactor_mpiaij_spooles", 5511 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5512 #endif 5513 #if defined(PETSC_HAVE_MUMPS) 5514 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5515 "MatGetFactor_aij_mumps", 5516 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5517 #endif 5518 #if defined(PETSC_HAVE_PASTIX) 5519 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5520 "MatGetFactor_mpiaij_pastix", 5521 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5522 #endif 5523 #if defined(PETSC_HAVE_SUPERLU_DIST) 5524 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5525 "MatGetFactor_mpiaij_superlu_dist", 5526 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5527 #endif 5528 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5529 "MatStoreValues_MPIAIJ", 5530 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5531 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5532 "MatRetrieveValues_MPIAIJ", 5533 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5534 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5535 "MatGetDiagonalBlock_MPIAIJ", 5536 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5537 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5538 "MatIsTranspose_MPIAIJ", 5539 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5540 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5541 "MatMPIAIJSetPreallocation_MPIAIJ", 5542 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5543 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5544 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5545 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5546 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5547 "MatDiagonalScaleLocal_MPIAIJ", 5548 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5549 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5550 "MatConvert_MPIAIJ_MPIAIJPERM", 5551 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5552 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5553 "MatConvert_MPIAIJ_MPIAIJCRL", 5554 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5555 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5556 "MatConvert_MPIAIJ_MPISBAIJ", 5557 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5558 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5559 "MatMatMult_MPIDense_MPIAIJ", 5560 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5562 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5563 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5564 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5565 "MatMatMultNumeric_MPIDense_MPIAIJ", 5566 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5567 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5568 PetscFunctionReturn(0); 5569 } 5570 EXTERN_C_END 5571 5572 #undef __FUNCT__ 5573 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5574 /*@ 5575 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5576 and "off-diagonal" part of the matrix in CSR format. 5577 5578 Collective on MPI_Comm 5579 5580 Input Parameters: 5581 + comm - MPI communicator 5582 . m - number of local rows (Cannot be PETSC_DECIDE) 5583 . n - This value should be the same as the local size used in creating the 5584 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5585 calculated if N is given) For square matrices n is almost always m. 5586 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5587 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5588 . i - row indices for "diagonal" portion of matrix 5589 . j - column indices 5590 . a - matrix values 5591 . oi - row indices for "off-diagonal" portion of matrix 5592 . oj - column indices 5593 - oa - matrix values 5594 5595 Output Parameter: 5596 . mat - the matrix 5597 5598 Level: advanced 5599 5600 Notes: 5601 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5602 must free the arrays once the matrix has been destroyed and not before. 5603 5604 The i and j indices are 0 based 5605 5606 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5607 5608 This sets local rows and cannot be used to set off-processor values. 5609 5610 You cannot later use MatSetValues() to change values in this matrix. 5611 5612 .keywords: matrix, aij, compressed row, sparse, parallel 5613 5614 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5615 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5616 @*/ 5617 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5618 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5619 { 5620 PetscErrorCode ierr; 5621 Mat_MPIAIJ *maij; 5622 5623 PetscFunctionBegin; 5624 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5625 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5626 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5627 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5628 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5629 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5630 maij = (Mat_MPIAIJ*) (*mat)->data; 5631 maij->donotstash = PETSC_TRUE; 5632 (*mat)->preallocated = PETSC_TRUE; 5633 5634 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5635 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5636 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5637 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5638 5639 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5640 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5641 5642 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5643 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5644 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5645 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5646 5647 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5648 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5649 PetscFunctionReturn(0); 5650 } 5651 5652 /* 5653 Special version for direct calls from Fortran 5654 */ 5655 #include <private/fortranimpl.h> 5656 5657 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5658 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5659 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5660 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5661 #endif 5662 5663 /* Change these macros so can be used in void function */ 5664 #undef CHKERRQ 5665 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5666 #undef SETERRQ2 5667 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5668 #undef SETERRQ3 5669 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5670 #undef SETERRQ 5671 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5672 5673 EXTERN_C_BEGIN 5674 #undef __FUNCT__ 5675 #define __FUNCT__ "matsetvaluesmpiaij_" 5676 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5677 { 5678 Mat mat = *mmat; 5679 PetscInt m = *mm, n = *mn; 5680 InsertMode addv = *maddv; 5681 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5682 PetscScalar value; 5683 PetscErrorCode ierr; 5684 5685 MatCheckPreallocated(mat,1); 5686 if (mat->insertmode == NOT_SET_VALUES) { 5687 mat->insertmode = addv; 5688 } 5689 #if defined(PETSC_USE_DEBUG) 5690 else if (mat->insertmode != addv) { 5691 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5692 } 5693 #endif 5694 { 5695 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5696 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5697 PetscBool roworiented = aij->roworiented; 5698 5699 /* Some Variables required in the macro */ 5700 Mat A = aij->A; 5701 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5702 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5703 MatScalar *aa = a->a; 5704 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5705 Mat B = aij->B; 5706 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5707 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5708 MatScalar *ba = b->a; 5709 5710 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5711 PetscInt nonew = a->nonew; 5712 MatScalar *ap1,*ap2; 5713 5714 PetscFunctionBegin; 5715 for (i=0; i<m; i++) { 5716 if (im[i] < 0) continue; 5717 #if defined(PETSC_USE_DEBUG) 5718 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5719 #endif 5720 if (im[i] >= rstart && im[i] < rend) { 5721 row = im[i] - rstart; 5722 lastcol1 = -1; 5723 rp1 = aj + ai[row]; 5724 ap1 = aa + ai[row]; 5725 rmax1 = aimax[row]; 5726 nrow1 = ailen[row]; 5727 low1 = 0; 5728 high1 = nrow1; 5729 lastcol2 = -1; 5730 rp2 = bj + bi[row]; 5731 ap2 = ba + bi[row]; 5732 rmax2 = bimax[row]; 5733 nrow2 = bilen[row]; 5734 low2 = 0; 5735 high2 = nrow2; 5736 5737 for (j=0; j<n; j++) { 5738 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5739 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5740 if (in[j] >= cstart && in[j] < cend){ 5741 col = in[j] - cstart; 5742 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5743 } else if (in[j] < 0) continue; 5744 #if defined(PETSC_USE_DEBUG) 5745 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5746 #endif 5747 else { 5748 if (mat->was_assembled) { 5749 if (!aij->colmap) { 5750 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5751 } 5752 #if defined (PETSC_USE_CTABLE) 5753 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5754 col--; 5755 #else 5756 col = aij->colmap[in[j]] - 1; 5757 #endif 5758 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5759 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5760 col = in[j]; 5761 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5762 B = aij->B; 5763 b = (Mat_SeqAIJ*)B->data; 5764 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5765 rp2 = bj + bi[row]; 5766 ap2 = ba + bi[row]; 5767 rmax2 = bimax[row]; 5768 nrow2 = bilen[row]; 5769 low2 = 0; 5770 high2 = nrow2; 5771 bm = aij->B->rmap->n; 5772 ba = b->a; 5773 } 5774 } else col = in[j]; 5775 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5776 } 5777 } 5778 } else { 5779 if (!aij->donotstash) { 5780 if (roworiented) { 5781 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5782 } else { 5783 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5784 } 5785 } 5786 } 5787 }} 5788 PetscFunctionReturnVoid(); 5789 } 5790 EXTERN_C_END 5791 5792