1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 if (x) { 1004 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 } 1009 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1010 1011 /* remove zeroed rows of off diagonal matrix */ 1012 ii = aij->i; 1013 for (i=0; i<slen; i++) { 1014 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1015 } 1016 1017 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1018 if (aij->compressedrow.use){ 1019 m = aij->compressedrow.nrows; 1020 ii = aij->compressedrow.i; 1021 ridx = aij->compressedrow.rindex; 1022 for (i=0; i<m; i++){ 1023 n = ii[i+1] - ii[i]; 1024 aj = aij->j + ii[i]; 1025 aa = aij->a + ii[i]; 1026 1027 for (j=0; j<n; j++) { 1028 if (PetscAbsScalar(mask[*aj])) { 1029 if (b) bb[*ridx] -= *aa*xx[*aj]; 1030 *aa = 0.0; 1031 } 1032 aa++; 1033 aj++; 1034 } 1035 ridx++; 1036 } 1037 } else { /* do not use compressed row format */ 1038 m = l->B->rmap->n; 1039 for (i=0; i<m; i++) { 1040 n = ii[i+1] - ii[i]; 1041 aj = aij->j + ii[i]; 1042 aa = aij->a + ii[i]; 1043 for (j=0; j<n; j++) { 1044 if (PetscAbsScalar(mask[*aj])) { 1045 if (b) bb[i] -= *aa*xx[*aj]; 1046 *aa = 0.0; 1047 } 1048 aa++; 1049 aj++; 1050 } 1051 } 1052 } 1053 if (x) { 1054 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1055 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1056 } 1057 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1058 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1059 ierr = PetscFree(lrows);CHKERRQ(ierr); 1060 1061 /* wait on sends */ 1062 if (nsends) { 1063 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1064 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1065 ierr = PetscFree(send_status);CHKERRQ(ierr); 1066 } 1067 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1068 ierr = PetscFree(svalues);CHKERRQ(ierr); 1069 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatMult_MPIAIJ" 1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 PetscInt nt; 1080 1081 PetscFunctionBegin; 1082 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1083 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1084 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1085 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1094 { 1095 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 PetscBool merged; 1125 1126 PetscFunctionBegin; 1127 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1128 /* do nondiagonal part */ 1129 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1130 if (!merged) { 1131 /* send it on its way */ 1132 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1133 /* do local part */ 1134 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1135 /* receive remote parts: note this assumes the values are not actually */ 1136 /* added in yy until the next line, */ 1137 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 } else { 1139 /* do local part */ 1140 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1141 /* send it on its way */ 1142 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 /* values actually were received in the Begin() but we need to call this nop */ 1144 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 EXTERN_C_BEGIN 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1152 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1153 { 1154 MPI_Comm comm; 1155 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1156 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1157 IS Me,Notme; 1158 PetscErrorCode ierr; 1159 PetscInt M,N,first,last,*notme,i; 1160 PetscMPIInt size; 1161 1162 PetscFunctionBegin; 1163 1164 /* Easy test: symmetric diagonal block */ 1165 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1166 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1167 if (!*f) PetscFunctionReturn(0); 1168 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1169 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1170 if (size == 1) PetscFunctionReturn(0); 1171 1172 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1173 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1174 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1175 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1176 for (i=0; i<first; i++) notme[i] = i; 1177 for (i=last; i<M; i++) notme[i-last+first] = i; 1178 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1179 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1180 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1181 Aoff = Aoffs[0]; 1182 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1183 Boff = Boffs[0]; 1184 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1185 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1186 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1187 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1188 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1189 ierr = PetscFree(notme);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 EXTERN_C_END 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1197 { 1198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 /* do nondiagonal part */ 1203 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1204 /* send it on its way */ 1205 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* receive remote parts */ 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1220 { 1221 PetscErrorCode ierr; 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1226 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1227 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatScale_MPIAIJ" 1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1234 { 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1240 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatDestroy_MPIAIJ" 1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1247 { 1248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 #if defined(PETSC_USE_LOG) 1253 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1254 #endif 1255 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1256 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1257 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1258 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1259 #if defined (PETSC_USE_CTABLE) 1260 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1261 #else 1262 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1263 #endif 1264 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1266 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1267 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1268 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1269 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1270 1271 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1286 { 1287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1288 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1289 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1290 PetscErrorCode ierr; 1291 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1292 int fd; 1293 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1294 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1295 PetscScalar *column_values; 1296 PetscInt message_count,flowcontrolcount; 1297 1298 PetscFunctionBegin; 1299 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1300 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1301 nz = A->nz + B->nz; 1302 if (!rank) { 1303 header[0] = MAT_FILE_CLASSID; 1304 header[1] = mat->rmap->N; 1305 header[2] = mat->cmap->N; 1306 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1307 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1308 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1309 /* get largest number of rows any processor has */ 1310 rlen = mat->rmap->n; 1311 range = mat->rmap->range; 1312 for (i=1; i<size; i++) { 1313 rlen = PetscMax(rlen,range[i+1] - range[i]); 1314 } 1315 } else { 1316 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1317 rlen = mat->rmap->n; 1318 } 1319 1320 /* load up the local row counts */ 1321 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1322 for (i=0; i<mat->rmap->n; i++) { 1323 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1324 } 1325 1326 /* store the row lengths to the file */ 1327 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1328 if (!rank) { 1329 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1330 for (i=1; i<size; i++) { 1331 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1332 rlen = range[i+1] - range[i]; 1333 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 } 1336 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1337 } else { 1338 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1339 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1341 } 1342 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1343 1344 /* load up the local column indices */ 1345 nzmax = nz; /* )th processor needs space a largest processor needs */ 1346 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1347 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1348 cnt = 0; 1349 for (i=0; i<mat->rmap->n; i++) { 1350 for (j=B->i[i]; j<B->i[i+1]; j++) { 1351 if ( (col = garray[B->j[j]]) > cstart) break; 1352 column_indices[cnt++] = col; 1353 } 1354 for (k=A->i[i]; k<A->i[i+1]; k++) { 1355 column_indices[cnt++] = A->j[k] + cstart; 1356 } 1357 for (; j<B->i[i+1]; j++) { 1358 column_indices[cnt++] = garray[B->j[j]]; 1359 } 1360 } 1361 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1362 1363 /* store the column indices to the file */ 1364 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1365 if (!rank) { 1366 MPI_Status status; 1367 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1368 for (i=1; i<size; i++) { 1369 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1370 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1371 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1372 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1373 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1376 } else { 1377 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1378 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1380 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1381 } 1382 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1383 1384 /* load up the local column values */ 1385 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1386 cnt = 0; 1387 for (i=0; i<mat->rmap->n; i++) { 1388 for (j=B->i[i]; j<B->i[i+1]; j++) { 1389 if ( garray[B->j[j]] > cstart) break; 1390 column_values[cnt++] = B->a[j]; 1391 } 1392 for (k=A->i[i]; k<A->i[i+1]; k++) { 1393 column_values[cnt++] = A->a[k]; 1394 } 1395 for (; j<B->i[i+1]; j++) { 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 } 1399 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1400 1401 /* store the column values to the file */ 1402 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1403 if (!rank) { 1404 MPI_Status status; 1405 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1406 for (i=1; i<size; i++) { 1407 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1408 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1409 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1410 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1411 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 } 1413 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1414 } else { 1415 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1416 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1418 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1419 } 1420 ierr = PetscFree(column_values);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1427 { 1428 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1429 PetscErrorCode ierr; 1430 PetscMPIInt rank = aij->rank,size = aij->size; 1431 PetscBool isdraw,iascii,isbinary; 1432 PetscViewer sviewer; 1433 PetscViewerFormat format; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1437 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1438 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1439 if (iascii) { 1440 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1441 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1442 MatInfo info; 1443 PetscBool inodes; 1444 1445 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1446 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1447 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1448 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1449 if (!inodes) { 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1451 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1452 } else { 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1454 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1455 } 1456 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1458 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1460 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1463 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1466 PetscInt inodecount,inodelimit,*inodes; 1467 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1468 if (inodes) { 1469 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1475 PetscFunctionReturn(0); 1476 } 1477 } else if (isbinary) { 1478 if (size == 1) { 1479 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1480 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1481 } else { 1482 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } else if (isdraw) { 1486 PetscDraw draw; 1487 PetscBool isnull; 1488 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1489 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1490 } 1491 1492 if (size == 1) { 1493 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1494 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1495 } else { 1496 /* assemble the entire matrix onto first processor. */ 1497 Mat A; 1498 Mat_SeqAIJ *Aloc; 1499 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1500 MatScalar *a; 1501 1502 if (mat->rmap->N > 1024) { 1503 PetscBool flg = PETSC_FALSE; 1504 1505 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1506 if (!flg) { 1507 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1508 } 1509 } 1510 1511 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1512 if (!rank) { 1513 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1514 } else { 1515 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1516 } 1517 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1518 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1519 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1520 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1521 1522 /* copy over the A part */ 1523 Aloc = (Mat_SeqAIJ*)aij->A->data; 1524 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1525 row = mat->rmap->rstart; 1526 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1527 for (i=0; i<m; i++) { 1528 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1529 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1530 } 1531 aj = Aloc->j; 1532 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1533 1534 /* copy over the B part */ 1535 Aloc = (Mat_SeqAIJ*)aij->B->data; 1536 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1537 row = mat->rmap->rstart; 1538 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1539 ct = cols; 1540 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1541 for (i=0; i<m; i++) { 1542 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1543 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1544 } 1545 ierr = PetscFree(ct);CHKERRQ(ierr); 1546 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1547 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1548 /* 1549 Everyone has to call to draw the matrix since the graphics waits are 1550 synchronized across all processors that share the PetscDraw object 1551 */ 1552 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1553 if (!rank) { 1554 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1555 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1556 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1557 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1558 } 1559 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1560 ierr = MatDestroy(&A);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatView_MPIAIJ" 1567 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1568 { 1569 PetscErrorCode ierr; 1570 PetscBool iascii,isdraw,issocket,isbinary; 1571 1572 PetscFunctionBegin; 1573 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1574 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1575 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1576 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1577 if (iascii || isdraw || isbinary || issocket) { 1578 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1579 } else { 1580 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatSOR_MPIAIJ" 1587 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1588 { 1589 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1590 PetscErrorCode ierr; 1591 Vec bb1 = 0; 1592 PetscBool hasop; 1593 1594 PetscFunctionBegin; 1595 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1596 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1597 } 1598 1599 if (flag == SOR_APPLY_UPPER) { 1600 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1605 if (flag & SOR_ZERO_INITIAL_GUESS) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 its--; 1608 } 1609 1610 while (its--) { 1611 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 1614 /* update rhs: bb1 = bb - B*x */ 1615 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1616 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1617 1618 /* local sweep */ 1619 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1620 } 1621 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 while (its--) { 1627 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 1630 /* update rhs: bb1 = bb - B*x */ 1631 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1632 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1633 1634 /* local sweep */ 1635 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1636 } 1637 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1638 if (flag & SOR_ZERO_INITIAL_GUESS) { 1639 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1640 its--; 1641 } 1642 while (its--) { 1643 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 1646 /* update rhs: bb1 = bb - B*x */ 1647 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1648 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1649 1650 /* local sweep */ 1651 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1652 } 1653 } else if (flag & SOR_EISENSTAT) { 1654 Vec xx1; 1655 1656 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1657 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1658 1659 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 if (!mat->diag) { 1662 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1663 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1664 } 1665 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1666 if (hasop) { 1667 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1668 } else { 1669 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1670 } 1671 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1672 1673 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1674 1675 /* local sweep */ 1676 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1677 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1678 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1679 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1680 1681 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatPermute_MPIAIJ" 1687 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1688 { 1689 MPI_Comm comm,pcomm; 1690 PetscInt first,local_size,nrows; 1691 const PetscInt *rows; 1692 PetscMPIInt size; 1693 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1698 /* make a collective version of 'rowp' */ 1699 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1700 if (pcomm==comm) { 1701 crowp = rowp; 1702 } else { 1703 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1704 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1705 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1706 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1707 } 1708 /* collect the global row permutation and invert it */ 1709 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1710 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1711 if (pcomm!=comm) { 1712 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1713 } 1714 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1715 /* get the local target indices */ 1716 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1717 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1718 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1719 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1720 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1721 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1722 /* the column permutation is so much easier; 1723 make a local version of 'colp' and invert it */ 1724 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1725 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1726 if (size==1) { 1727 lcolp = colp; 1728 } else { 1729 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1730 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1731 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1732 } 1733 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1734 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1735 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1736 if (size>1) { 1737 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1738 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1739 } 1740 /* now we just get the submatrix */ 1741 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1742 /* clean up */ 1743 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1744 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1745 PetscFunctionReturn(0); 1746 } 1747 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1750 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1751 { 1752 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1753 Mat A = mat->A,B = mat->B; 1754 PetscErrorCode ierr; 1755 PetscReal isend[5],irecv[5]; 1756 1757 PetscFunctionBegin; 1758 info->block_size = 1.0; 1759 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1760 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1761 isend[3] = info->memory; isend[4] = info->mallocs; 1762 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1763 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1764 isend[3] += info->memory; isend[4] += info->mallocs; 1765 if (flag == MAT_LOCAL) { 1766 info->nz_used = isend[0]; 1767 info->nz_allocated = isend[1]; 1768 info->nz_unneeded = isend[2]; 1769 info->memory = isend[3]; 1770 info->mallocs = isend[4]; 1771 } else if (flag == MAT_GLOBAL_MAX) { 1772 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1773 info->nz_used = irecv[0]; 1774 info->nz_allocated = irecv[1]; 1775 info->nz_unneeded = irecv[2]; 1776 info->memory = irecv[3]; 1777 info->mallocs = irecv[4]; 1778 } else if (flag == MAT_GLOBAL_SUM) { 1779 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 1790 PetscFunctionReturn(0); 1791 } 1792 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "MatSetOption_MPIAIJ" 1795 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1796 { 1797 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1798 PetscErrorCode ierr; 1799 1800 PetscFunctionBegin; 1801 switch (op) { 1802 case MAT_NEW_NONZERO_LOCATIONS: 1803 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1804 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1805 case MAT_KEEP_NONZERO_PATTERN: 1806 case MAT_NEW_NONZERO_LOCATION_ERR: 1807 case MAT_USE_INODES: 1808 case MAT_IGNORE_ZERO_ENTRIES: 1809 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1810 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1811 break; 1812 case MAT_ROW_ORIENTED: 1813 a->roworiented = flg; 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1819 break; 1820 case MAT_IGNORE_OFF_PROC_ENTRIES: 1821 a->donotstash = flg; 1822 break; 1823 case MAT_SPD: 1824 A->spd_set = PETSC_TRUE; 1825 A->spd = flg; 1826 if (flg) { 1827 A->symmetric = PETSC_TRUE; 1828 A->structurally_symmetric = PETSC_TRUE; 1829 A->symmetric_set = PETSC_TRUE; 1830 A->structurally_symmetric_set = PETSC_TRUE; 1831 } 1832 break; 1833 case MAT_SYMMETRIC: 1834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1835 break; 1836 case MAT_STRUCTURALLY_SYMMETRIC: 1837 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1838 break; 1839 case MAT_HERMITIAN: 1840 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1841 break; 1842 case MAT_SYMMETRY_ETERNAL: 1843 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1844 break; 1845 default: 1846 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1847 } 1848 PetscFunctionReturn(0); 1849 } 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatGetRow_MPIAIJ" 1853 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1854 { 1855 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1856 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1857 PetscErrorCode ierr; 1858 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1859 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1860 PetscInt *cmap,*idx_p; 1861 1862 PetscFunctionBegin; 1863 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1864 mat->getrowactive = PETSC_TRUE; 1865 1866 if (!mat->rowvalues && (idx || v)) { 1867 /* 1868 allocate enough space to hold information from the longest row. 1869 */ 1870 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1871 PetscInt max = 1,tmp; 1872 for (i=0; i<matin->rmap->n; i++) { 1873 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1874 if (max < tmp) { max = tmp; } 1875 } 1876 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1877 } 1878 1879 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1880 lrow = row - rstart; 1881 1882 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1883 if (!v) {pvA = 0; pvB = 0;} 1884 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1885 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1886 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1887 nztot = nzA + nzB; 1888 1889 cmap = mat->garray; 1890 if (v || idx) { 1891 if (nztot) { 1892 /* Sort by increasing column numbers, assuming A and B already sorted */ 1893 PetscInt imark = -1; 1894 if (v) { 1895 *v = v_p = mat->rowvalues; 1896 for (i=0; i<nzB; i++) { 1897 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1898 else break; 1899 } 1900 imark = i; 1901 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1902 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1903 } 1904 if (idx) { 1905 *idx = idx_p = mat->rowindices; 1906 if (imark > -1) { 1907 for (i=0; i<imark; i++) { 1908 idx_p[i] = cmap[cworkB[i]]; 1909 } 1910 } else { 1911 for (i=0; i<nzB; i++) { 1912 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1913 else break; 1914 } 1915 imark = i; 1916 } 1917 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1918 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1919 } 1920 } else { 1921 if (idx) *idx = 0; 1922 if (v) *v = 0; 1923 } 1924 } 1925 *nz = nztot; 1926 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1927 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1933 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1934 { 1935 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1936 1937 PetscFunctionBegin; 1938 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1939 aij->getrowactive = PETSC_FALSE; 1940 PetscFunctionReturn(0); 1941 } 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatNorm_MPIAIJ" 1945 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1946 { 1947 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1948 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1949 PetscErrorCode ierr; 1950 PetscInt i,j,cstart = mat->cmap->rstart; 1951 PetscReal sum = 0.0; 1952 MatScalar *v; 1953 1954 PetscFunctionBegin; 1955 if (aij->size == 1) { 1956 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1957 } else { 1958 if (type == NORM_FROBENIUS) { 1959 v = amat->a; 1960 for (i=0; i<amat->nz; i++) { 1961 #if defined(PETSC_USE_COMPLEX) 1962 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1963 #else 1964 sum += (*v)*(*v); v++; 1965 #endif 1966 } 1967 v = bmat->a; 1968 for (i=0; i<bmat->nz; i++) { 1969 #if defined(PETSC_USE_COMPLEX) 1970 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1971 #else 1972 sum += (*v)*(*v); v++; 1973 #endif 1974 } 1975 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1976 *norm = PetscSqrtReal(*norm); 1977 } else if (type == NORM_1) { /* max column norm */ 1978 PetscReal *tmp,*tmp2; 1979 PetscInt *jj,*garray = aij->garray; 1980 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1981 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1982 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1983 *norm = 0.0; 1984 v = amat->a; jj = amat->j; 1985 for (j=0; j<amat->nz; j++) { 1986 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1987 } 1988 v = bmat->a; jj = bmat->j; 1989 for (j=0; j<bmat->nz; j++) { 1990 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1991 } 1992 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1993 for (j=0; j<mat->cmap->N; j++) { 1994 if (tmp2[j] > *norm) *norm = tmp2[j]; 1995 } 1996 ierr = PetscFree(tmp);CHKERRQ(ierr); 1997 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1998 } else if (type == NORM_INFINITY) { /* max row norm */ 1999 PetscReal ntemp = 0.0; 2000 for (j=0; j<aij->A->rmap->n; j++) { 2001 v = amat->a + amat->i[j]; 2002 sum = 0.0; 2003 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2004 sum += PetscAbsScalar(*v); v++; 2005 } 2006 v = bmat->a + bmat->i[j]; 2007 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2008 sum += PetscAbsScalar(*v); v++; 2009 } 2010 if (sum > ntemp) ntemp = sum; 2011 } 2012 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2013 } else { 2014 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2015 } 2016 } 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatTranspose_MPIAIJ" 2022 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2023 { 2024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2025 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2026 PetscErrorCode ierr; 2027 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2028 PetscInt cstart=A->cmap->rstart,ncol; 2029 Mat B; 2030 MatScalar *array; 2031 2032 PetscFunctionBegin; 2033 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2034 2035 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2036 ai = Aloc->i; aj = Aloc->j; 2037 bi = Bloc->i; bj = Bloc->j; 2038 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2039 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2040 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2041 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2042 for (i=0; i<ai[ma]; i++){ 2043 d_nnz[aj[i]] ++; 2044 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2045 } 2046 2047 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2048 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2049 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2050 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2051 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2052 } else { 2053 B = *matout; 2054 } 2055 2056 /* copy over the A part */ 2057 array = Aloc->a; 2058 row = A->rmap->rstart; 2059 for (i=0; i<ma; i++) { 2060 ncol = ai[i+1]-ai[i]; 2061 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2062 row++; array += ncol; aj += ncol; 2063 } 2064 aj = Aloc->j; 2065 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2066 2067 /* copy over the B part */ 2068 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2069 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2070 array = Bloc->a; 2071 row = A->rmap->rstart; 2072 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2073 cols_tmp = cols; 2074 for (i=0; i<mb; i++) { 2075 ncol = bi[i+1]-bi[i]; 2076 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2077 row++; array += ncol; cols_tmp += ncol; 2078 } 2079 ierr = PetscFree(cols);CHKERRQ(ierr); 2080 2081 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2082 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2083 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2084 *matout = B; 2085 } else { 2086 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2087 } 2088 PetscFunctionReturn(0); 2089 } 2090 2091 #undef __FUNCT__ 2092 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2093 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2094 { 2095 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2096 Mat a = aij->A,b = aij->B; 2097 PetscErrorCode ierr; 2098 PetscInt s1,s2,s3; 2099 2100 PetscFunctionBegin; 2101 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2102 if (rr) { 2103 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2104 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2105 /* Overlap communication with computation. */ 2106 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2107 } 2108 if (ll) { 2109 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2110 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2111 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2112 } 2113 /* scale the diagonal block */ 2114 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2115 2116 if (rr) { 2117 /* Do a scatter end and then right scale the off-diagonal block */ 2118 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2119 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2120 } 2121 2122 PetscFunctionReturn(0); 2123 } 2124 2125 #undef __FUNCT__ 2126 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2127 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2128 { 2129 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2134 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2135 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2136 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2137 PetscFunctionReturn(0); 2138 } 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2141 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2142 { 2143 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatEqual_MPIAIJ" 2153 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2154 { 2155 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2156 Mat a,b,c,d; 2157 PetscBool flg; 2158 PetscErrorCode ierr; 2159 2160 PetscFunctionBegin; 2161 a = matA->A; b = matA->B; 2162 c = matB->A; d = matB->B; 2163 2164 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2165 if (flg) { 2166 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2167 } 2168 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2169 PetscFunctionReturn(0); 2170 } 2171 2172 #undef __FUNCT__ 2173 #define __FUNCT__ "MatCopy_MPIAIJ" 2174 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2175 { 2176 PetscErrorCode ierr; 2177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2178 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2179 2180 PetscFunctionBegin; 2181 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2182 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2183 /* because of the column compression in the off-processor part of the matrix a->B, 2184 the number of columns in a->B and b->B may be different, hence we cannot call 2185 the MatCopy() directly on the two parts. If need be, we can provide a more 2186 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2187 then copying the submatrices */ 2188 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2189 } else { 2190 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2191 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2192 } 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2198 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2199 { 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 #undef __FUNCT__ 2208 #define __FUNCT__ "MatAXPY_MPIAIJ" 2209 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2210 { 2211 PetscErrorCode ierr; 2212 PetscInt i; 2213 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2214 PetscBLASInt bnz,one=1; 2215 Mat_SeqAIJ *x,*y; 2216 2217 PetscFunctionBegin; 2218 if (str == SAME_NONZERO_PATTERN) { 2219 PetscScalar alpha = a; 2220 x = (Mat_SeqAIJ *)xx->A->data; 2221 y = (Mat_SeqAIJ *)yy->A->data; 2222 bnz = PetscBLASIntCast(x->nz); 2223 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2224 x = (Mat_SeqAIJ *)xx->B->data; 2225 y = (Mat_SeqAIJ *)yy->B->data; 2226 bnz = PetscBLASIntCast(x->nz); 2227 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2228 } else if (str == SUBSET_NONZERO_PATTERN) { 2229 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2230 2231 x = (Mat_SeqAIJ *)xx->B->data; 2232 y = (Mat_SeqAIJ *)yy->B->data; 2233 if (y->xtoy && y->XtoY != xx->B) { 2234 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2235 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2236 } 2237 if (!y->xtoy) { /* get xtoy */ 2238 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2239 y->XtoY = xx->B; 2240 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2241 } 2242 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2243 } else { 2244 Mat B; 2245 PetscInt *nnz_d,*nnz_o; 2246 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2247 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2248 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2249 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2250 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2251 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2253 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2254 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2255 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2256 ierr = MatHeaderReplace(Y,B); 2257 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatConjugate_MPIAIJ" 2267 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2268 { 2269 #if defined(PETSC_USE_COMPLEX) 2270 PetscErrorCode ierr; 2271 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2272 2273 PetscFunctionBegin; 2274 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2275 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2276 #else 2277 PetscFunctionBegin; 2278 #endif 2279 PetscFunctionReturn(0); 2280 } 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatRealPart_MPIAIJ" 2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2285 { 2286 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2291 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2297 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2298 { 2299 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2300 PetscErrorCode ierr; 2301 2302 PetscFunctionBegin; 2303 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2304 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #ifdef PETSC_HAVE_PBGL 2309 2310 #include <boost/parallel/mpi/bsp_process_group.hpp> 2311 #include <boost/graph/distributed/ilu_default_graph.hpp> 2312 #include <boost/graph/distributed/ilu_0_block.hpp> 2313 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2314 #include <boost/graph/distributed/petsc/interface.hpp> 2315 #include <boost/multi_array.hpp> 2316 #include <boost/parallel/distributed_property_map->hpp> 2317 2318 #undef __FUNCT__ 2319 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2320 /* 2321 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2322 */ 2323 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2324 { 2325 namespace petsc = boost::distributed::petsc; 2326 2327 namespace graph_dist = boost::graph::distributed; 2328 using boost::graph::distributed::ilu_default::process_group_type; 2329 using boost::graph::ilu_permuted; 2330 2331 PetscBool row_identity, col_identity; 2332 PetscContainer c; 2333 PetscInt m, n, M, N; 2334 PetscErrorCode ierr; 2335 2336 PetscFunctionBegin; 2337 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2338 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2339 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2340 if (!row_identity || !col_identity) { 2341 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2342 } 2343 2344 process_group_type pg; 2345 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2346 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2347 lgraph_type& level_graph = *lgraph_p; 2348 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2349 2350 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2351 ilu_permuted(level_graph); 2352 2353 /* put together the new matrix */ 2354 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2355 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2356 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2357 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2358 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2359 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 2362 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2363 ierr = PetscContainerSetPointer(c, lgraph_p); 2364 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2365 ierr = PetscContainerDestroy(&c); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2371 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2372 { 2373 PetscFunctionBegin; 2374 PetscFunctionReturn(0); 2375 } 2376 2377 #undef __FUNCT__ 2378 #define __FUNCT__ "MatSolve_MPIAIJ" 2379 /* 2380 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2381 */ 2382 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2383 { 2384 namespace graph_dist = boost::graph::distributed; 2385 2386 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2387 lgraph_type* lgraph_p; 2388 PetscContainer c; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2393 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2394 ierr = VecCopy(b, x);CHKERRQ(ierr); 2395 2396 PetscScalar* array_x; 2397 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2398 PetscInt sx; 2399 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2400 2401 PetscScalar* array_b; 2402 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2403 PetscInt sb; 2404 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2405 2406 lgraph_type& level_graph = *lgraph_p; 2407 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2408 2409 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2410 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2411 ref_x(array_x, boost::extents[num_vertices(graph)]); 2412 2413 typedef boost::iterator_property_map<array_ref_type::iterator, 2414 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2415 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2416 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2417 2418 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2419 2420 PetscFunctionReturn(0); 2421 } 2422 #endif 2423 2424 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2425 PetscInt nzlocal,nsends,nrecvs; 2426 PetscMPIInt *send_rank,*recv_rank; 2427 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2428 PetscScalar *sbuf_a,**rbuf_a; 2429 PetscErrorCode (*Destroy)(Mat); 2430 } Mat_Redundant; 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2434 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2435 { 2436 PetscErrorCode ierr; 2437 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2438 PetscInt i; 2439 2440 PetscFunctionBegin; 2441 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2442 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2443 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2444 for (i=0; i<redund->nrecvs; i++){ 2445 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2446 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2447 } 2448 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2449 ierr = PetscFree(redund);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatDestroy_MatRedundant" 2455 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2456 { 2457 PetscErrorCode ierr; 2458 PetscContainer container; 2459 Mat_Redundant *redund=PETSC_NULL; 2460 2461 PetscFunctionBegin; 2462 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2463 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2464 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2465 A->ops->destroy = redund->Destroy; 2466 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2467 if (A->ops->destroy) { 2468 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2475 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2476 { 2477 PetscMPIInt rank,size; 2478 MPI_Comm comm=((PetscObject)mat)->comm; 2479 PetscErrorCode ierr; 2480 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2481 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2482 PetscInt *rowrange=mat->rmap->range; 2483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2484 Mat A=aij->A,B=aij->B,C=*matredundant; 2485 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2486 PetscScalar *sbuf_a; 2487 PetscInt nzlocal=a->nz+b->nz; 2488 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2489 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2490 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2491 MatScalar *aworkA,*aworkB; 2492 PetscScalar *vals; 2493 PetscMPIInt tag1,tag2,tag3,imdex; 2494 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2495 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2496 MPI_Status recv_status,*send_status; 2497 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2498 PetscInt **rbuf_j=PETSC_NULL; 2499 PetscScalar **rbuf_a=PETSC_NULL; 2500 Mat_Redundant *redund=PETSC_NULL; 2501 PetscContainer container; 2502 2503 PetscFunctionBegin; 2504 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2505 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2506 2507 if (reuse == MAT_REUSE_MATRIX) { 2508 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2509 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2510 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2511 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2512 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2513 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2514 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2515 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2516 2517 nsends = redund->nsends; 2518 nrecvs = redund->nrecvs; 2519 send_rank = redund->send_rank; 2520 recv_rank = redund->recv_rank; 2521 sbuf_nz = redund->sbuf_nz; 2522 rbuf_nz = redund->rbuf_nz; 2523 sbuf_j = redund->sbuf_j; 2524 sbuf_a = redund->sbuf_a; 2525 rbuf_j = redund->rbuf_j; 2526 rbuf_a = redund->rbuf_a; 2527 } 2528 2529 if (reuse == MAT_INITIAL_MATRIX){ 2530 PetscMPIInt subrank,subsize; 2531 PetscInt nleftover,np_subcomm; 2532 /* get the destination processors' id send_rank, nsends and nrecvs */ 2533 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2534 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2535 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2536 np_subcomm = size/nsubcomm; 2537 nleftover = size - nsubcomm*np_subcomm; 2538 nsends = 0; nrecvs = 0; 2539 for (i=0; i<size; i++){ /* i=rank*/ 2540 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2541 send_rank[nsends] = i; nsends++; 2542 recv_rank[nrecvs++] = i; 2543 } 2544 } 2545 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2546 i = size-nleftover-1; 2547 j = 0; 2548 while (j < nsubcomm - nleftover){ 2549 send_rank[nsends++] = i; 2550 i--; j++; 2551 } 2552 } 2553 2554 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2555 for (i=0; i<nleftover; i++){ 2556 recv_rank[nrecvs++] = size-nleftover+i; 2557 } 2558 } 2559 2560 /* allocate sbuf_j, sbuf_a */ 2561 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2562 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2563 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2564 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2565 2566 /* copy mat's local entries into the buffers */ 2567 if (reuse == MAT_INITIAL_MATRIX){ 2568 rownz_max = 0; 2569 rptr = sbuf_j; 2570 cols = sbuf_j + rend-rstart + 1; 2571 vals = sbuf_a; 2572 rptr[0] = 0; 2573 for (i=0; i<rend-rstart; i++){ 2574 row = i + rstart; 2575 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2576 ncols = nzA + nzB; 2577 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2578 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2579 /* load the column indices for this row into cols */ 2580 lwrite = 0; 2581 for (l=0; l<nzB; l++) { 2582 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2583 vals[lwrite] = aworkB[l]; 2584 cols[lwrite++] = ctmp; 2585 } 2586 } 2587 for (l=0; l<nzA; l++){ 2588 vals[lwrite] = aworkA[l]; 2589 cols[lwrite++] = cstart + cworkA[l]; 2590 } 2591 for (l=0; l<nzB; l++) { 2592 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2593 vals[lwrite] = aworkB[l]; 2594 cols[lwrite++] = ctmp; 2595 } 2596 } 2597 vals += ncols; 2598 cols += ncols; 2599 rptr[i+1] = rptr[i] + ncols; 2600 if (rownz_max < ncols) rownz_max = ncols; 2601 } 2602 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2603 } else { /* only copy matrix values into sbuf_a */ 2604 rptr = sbuf_j; 2605 vals = sbuf_a; 2606 rptr[0] = 0; 2607 for (i=0; i<rend-rstart; i++){ 2608 row = i + rstart; 2609 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2610 ncols = nzA + nzB; 2611 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2612 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2613 lwrite = 0; 2614 for (l=0; l<nzB; l++) { 2615 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2616 } 2617 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2618 for (l=0; l<nzB; l++) { 2619 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2620 } 2621 vals += ncols; 2622 rptr[i+1] = rptr[i] + ncols; 2623 } 2624 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2625 2626 /* send nzlocal to others, and recv other's nzlocal */ 2627 /*--------------------------------------------------*/ 2628 if (reuse == MAT_INITIAL_MATRIX){ 2629 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2630 s_waits2 = s_waits3 + nsends; 2631 s_waits1 = s_waits2 + nsends; 2632 r_waits1 = s_waits1 + nsends; 2633 r_waits2 = r_waits1 + nrecvs; 2634 r_waits3 = r_waits2 + nrecvs; 2635 } else { 2636 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2637 r_waits3 = s_waits3 + nsends; 2638 } 2639 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2641 if (reuse == MAT_INITIAL_MATRIX){ 2642 /* get new tags to keep the communication clean */ 2643 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2644 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2645 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2646 2647 /* post receives of other's nzlocal */ 2648 for (i=0; i<nrecvs; i++){ 2649 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2650 } 2651 /* send nzlocal to others */ 2652 for (i=0; i<nsends; i++){ 2653 sbuf_nz[i] = nzlocal; 2654 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2655 } 2656 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2657 count = nrecvs; 2658 while (count) { 2659 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2660 recv_rank[imdex] = recv_status.MPI_SOURCE; 2661 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2662 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2663 2664 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2665 rbuf_nz[imdex] += i + 2; 2666 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2667 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2668 count--; 2669 } 2670 /* wait on sends of nzlocal */ 2671 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2672 /* send mat->i,j to others, and recv from other's */ 2673 /*------------------------------------------------*/ 2674 for (i=0; i<nsends; i++){ 2675 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2676 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2677 } 2678 /* wait on receives of mat->i,j */ 2679 /*------------------------------*/ 2680 count = nrecvs; 2681 while (count) { 2682 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2683 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2684 count--; 2685 } 2686 /* wait on sends of mat->i,j */ 2687 /*---------------------------*/ 2688 if (nsends) { 2689 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2690 } 2691 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2692 2693 /* post receives, send and receive mat->a */ 2694 /*----------------------------------------*/ 2695 for (imdex=0; imdex<nrecvs; imdex++) { 2696 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2697 } 2698 for (i=0; i<nsends; i++){ 2699 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2700 } 2701 count = nrecvs; 2702 while (count) { 2703 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2704 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2705 count--; 2706 } 2707 if (nsends) { 2708 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2709 } 2710 2711 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2712 2713 /* create redundant matrix */ 2714 /*-------------------------*/ 2715 if (reuse == MAT_INITIAL_MATRIX){ 2716 /* compute rownz_max for preallocation */ 2717 for (imdex=0; imdex<nrecvs; imdex++){ 2718 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2719 rptr = rbuf_j[imdex]; 2720 for (i=0; i<j; i++){ 2721 ncols = rptr[i+1] - rptr[i]; 2722 if (rownz_max < ncols) rownz_max = ncols; 2723 } 2724 } 2725 2726 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2727 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2728 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2729 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2730 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2731 } else { 2732 C = *matredundant; 2733 } 2734 2735 /* insert local matrix entries */ 2736 rptr = sbuf_j; 2737 cols = sbuf_j + rend-rstart + 1; 2738 vals = sbuf_a; 2739 for (i=0; i<rend-rstart; i++){ 2740 row = i + rstart; 2741 ncols = rptr[i+1] - rptr[i]; 2742 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2743 vals += ncols; 2744 cols += ncols; 2745 } 2746 /* insert received matrix entries */ 2747 for (imdex=0; imdex<nrecvs; imdex++){ 2748 rstart = rowrange[recv_rank[imdex]]; 2749 rend = rowrange[recv_rank[imdex]+1]; 2750 rptr = rbuf_j[imdex]; 2751 cols = rbuf_j[imdex] + rend-rstart + 1; 2752 vals = rbuf_a[imdex]; 2753 for (i=0; i<rend-rstart; i++){ 2754 row = i + rstart; 2755 ncols = rptr[i+1] - rptr[i]; 2756 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2757 vals += ncols; 2758 cols += ncols; 2759 } 2760 } 2761 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2762 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2763 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2764 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2765 if (reuse == MAT_INITIAL_MATRIX) { 2766 PetscContainer container; 2767 *matredundant = C; 2768 /* create a supporting struct and attach it to C for reuse */ 2769 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2770 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2771 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2772 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2773 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2774 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2775 2776 redund->nzlocal = nzlocal; 2777 redund->nsends = nsends; 2778 redund->nrecvs = nrecvs; 2779 redund->send_rank = send_rank; 2780 redund->recv_rank = recv_rank; 2781 redund->sbuf_nz = sbuf_nz; 2782 redund->rbuf_nz = rbuf_nz; 2783 redund->sbuf_j = sbuf_j; 2784 redund->sbuf_a = sbuf_a; 2785 redund->rbuf_j = rbuf_j; 2786 redund->rbuf_a = rbuf_a; 2787 2788 redund->Destroy = C->ops->destroy; 2789 C->ops->destroy = MatDestroy_MatRedundant; 2790 } 2791 PetscFunctionReturn(0); 2792 } 2793 2794 #undef __FUNCT__ 2795 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2796 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2797 { 2798 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2799 PetscErrorCode ierr; 2800 PetscInt i,*idxb = 0; 2801 PetscScalar *va,*vb; 2802 Vec vtmp; 2803 2804 PetscFunctionBegin; 2805 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2806 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2807 if (idx) { 2808 for (i=0; i<A->rmap->n; i++) { 2809 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2810 } 2811 } 2812 2813 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2814 if (idx) { 2815 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2816 } 2817 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2818 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2819 2820 for (i=0; i<A->rmap->n; i++){ 2821 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2822 va[i] = vb[i]; 2823 if (idx) idx[i] = a->garray[idxb[i]]; 2824 } 2825 } 2826 2827 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2828 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2829 ierr = PetscFree(idxb);CHKERRQ(ierr); 2830 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2831 PetscFunctionReturn(0); 2832 } 2833 2834 #undef __FUNCT__ 2835 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2836 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2837 { 2838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2839 PetscErrorCode ierr; 2840 PetscInt i,*idxb = 0; 2841 PetscScalar *va,*vb; 2842 Vec vtmp; 2843 2844 PetscFunctionBegin; 2845 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2846 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2847 if (idx) { 2848 for (i=0; i<A->cmap->n; i++) { 2849 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2850 } 2851 } 2852 2853 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2854 if (idx) { 2855 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2856 } 2857 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2858 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2859 2860 for (i=0; i<A->rmap->n; i++){ 2861 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2862 va[i] = vb[i]; 2863 if (idx) idx[i] = a->garray[idxb[i]]; 2864 } 2865 } 2866 2867 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2868 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2869 ierr = PetscFree(idxb);CHKERRQ(ierr); 2870 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2871 PetscFunctionReturn(0); 2872 } 2873 2874 #undef __FUNCT__ 2875 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2876 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2877 { 2878 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2879 PetscInt n = A->rmap->n; 2880 PetscInt cstart = A->cmap->rstart; 2881 PetscInt *cmap = mat->garray; 2882 PetscInt *diagIdx, *offdiagIdx; 2883 Vec diagV, offdiagV; 2884 PetscScalar *a, *diagA, *offdiagA; 2885 PetscInt r; 2886 PetscErrorCode ierr; 2887 2888 PetscFunctionBegin; 2889 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2890 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2891 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2892 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2893 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2894 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2895 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2896 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2897 for(r = 0; r < n; ++r) { 2898 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2899 a[r] = diagA[r]; 2900 idx[r] = cstart + diagIdx[r]; 2901 } else { 2902 a[r] = offdiagA[r]; 2903 idx[r] = cmap[offdiagIdx[r]]; 2904 } 2905 } 2906 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2907 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2908 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2909 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2910 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2911 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2912 PetscFunctionReturn(0); 2913 } 2914 2915 #undef __FUNCT__ 2916 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2917 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2918 { 2919 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2920 PetscInt n = A->rmap->n; 2921 PetscInt cstart = A->cmap->rstart; 2922 PetscInt *cmap = mat->garray; 2923 PetscInt *diagIdx, *offdiagIdx; 2924 Vec diagV, offdiagV; 2925 PetscScalar *a, *diagA, *offdiagA; 2926 PetscInt r; 2927 PetscErrorCode ierr; 2928 2929 PetscFunctionBegin; 2930 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2931 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2932 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2933 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2934 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2935 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2936 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2937 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2938 for(r = 0; r < n; ++r) { 2939 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2940 a[r] = diagA[r]; 2941 idx[r] = cstart + diagIdx[r]; 2942 } else { 2943 a[r] = offdiagA[r]; 2944 idx[r] = cmap[offdiagIdx[r]]; 2945 } 2946 } 2947 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2948 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2949 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2950 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2951 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2952 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2953 PetscFunctionReturn(0); 2954 } 2955 2956 #undef __FUNCT__ 2957 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2958 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2959 { 2960 PetscErrorCode ierr; 2961 Mat *dummy; 2962 2963 PetscFunctionBegin; 2964 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2965 *newmat = *dummy; 2966 ierr = PetscFree(dummy);CHKERRQ(ierr); 2967 PetscFunctionReturn(0); 2968 } 2969 2970 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2971 2972 #undef __FUNCT__ 2973 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2974 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2975 { 2976 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2977 PetscErrorCode ierr; 2978 2979 PetscFunctionBegin; 2980 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2981 PetscFunctionReturn(0); 2982 } 2983 2984 2985 /* -------------------------------------------------------------------*/ 2986 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2987 MatGetRow_MPIAIJ, 2988 MatRestoreRow_MPIAIJ, 2989 MatMult_MPIAIJ, 2990 /* 4*/ MatMultAdd_MPIAIJ, 2991 MatMultTranspose_MPIAIJ, 2992 MatMultTransposeAdd_MPIAIJ, 2993 #ifdef PETSC_HAVE_PBGL 2994 MatSolve_MPIAIJ, 2995 #else 2996 0, 2997 #endif 2998 0, 2999 0, 3000 /*10*/ 0, 3001 0, 3002 0, 3003 MatSOR_MPIAIJ, 3004 MatTranspose_MPIAIJ, 3005 /*15*/ MatGetInfo_MPIAIJ, 3006 MatEqual_MPIAIJ, 3007 MatGetDiagonal_MPIAIJ, 3008 MatDiagonalScale_MPIAIJ, 3009 MatNorm_MPIAIJ, 3010 /*20*/ MatAssemblyBegin_MPIAIJ, 3011 MatAssemblyEnd_MPIAIJ, 3012 MatSetOption_MPIAIJ, 3013 MatZeroEntries_MPIAIJ, 3014 /*24*/ MatZeroRows_MPIAIJ, 3015 0, 3016 #ifdef PETSC_HAVE_PBGL 3017 0, 3018 #else 3019 0, 3020 #endif 3021 0, 3022 0, 3023 /*29*/ MatSetUpPreallocation_MPIAIJ, 3024 #ifdef PETSC_HAVE_PBGL 3025 0, 3026 #else 3027 0, 3028 #endif 3029 0, 3030 0, 3031 0, 3032 /*34*/ MatDuplicate_MPIAIJ, 3033 0, 3034 0, 3035 0, 3036 0, 3037 /*39*/ MatAXPY_MPIAIJ, 3038 MatGetSubMatrices_MPIAIJ, 3039 MatIncreaseOverlap_MPIAIJ, 3040 MatGetValues_MPIAIJ, 3041 MatCopy_MPIAIJ, 3042 /*44*/ MatGetRowMax_MPIAIJ, 3043 MatScale_MPIAIJ, 3044 0, 3045 0, 3046 MatZeroRowsColumns_MPIAIJ, 3047 /*49*/ MatSetBlockSize_MPIAIJ, 3048 0, 3049 0, 3050 0, 3051 0, 3052 /*54*/ MatFDColoringCreate_MPIAIJ, 3053 0, 3054 MatSetUnfactored_MPIAIJ, 3055 MatPermute_MPIAIJ, 3056 0, 3057 /*59*/ MatGetSubMatrix_MPIAIJ, 3058 MatDestroy_MPIAIJ, 3059 MatView_MPIAIJ, 3060 0, 3061 0, 3062 /*64*/ 0, 3063 0, 3064 0, 3065 0, 3066 0, 3067 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3068 MatGetRowMinAbs_MPIAIJ, 3069 0, 3070 MatSetColoring_MPIAIJ, 3071 #if defined(PETSC_HAVE_ADIC) 3072 MatSetValuesAdic_MPIAIJ, 3073 #else 3074 0, 3075 #endif 3076 MatSetValuesAdifor_MPIAIJ, 3077 /*75*/ MatFDColoringApply_AIJ, 3078 0, 3079 0, 3080 0, 3081 0, 3082 /*80*/ 0, 3083 0, 3084 0, 3085 /*83*/ MatLoad_MPIAIJ, 3086 0, 3087 0, 3088 0, 3089 0, 3090 0, 3091 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3092 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3093 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3094 MatPtAP_Basic, 3095 MatPtAPSymbolic_MPIAIJ, 3096 /*94*/ MatPtAPNumeric_MPIAIJ, 3097 0, 3098 0, 3099 0, 3100 0, 3101 /*99*/ 0, 3102 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3103 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3104 MatConjugate_MPIAIJ, 3105 0, 3106 /*104*/MatSetValuesRow_MPIAIJ, 3107 MatRealPart_MPIAIJ, 3108 MatImaginaryPart_MPIAIJ, 3109 0, 3110 0, 3111 /*109*/0, 3112 MatGetRedundantMatrix_MPIAIJ, 3113 MatGetRowMin_MPIAIJ, 3114 0, 3115 0, 3116 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3117 0, 3118 0, 3119 0, 3120 0, 3121 /*119*/0, 3122 0, 3123 0, 3124 0, 3125 MatGetMultiProcBlock_MPIAIJ, 3126 /*124*/MatFindNonZeroRows_MPIAIJ, 3127 MatGetColumnNorms_MPIAIJ, 3128 MatInvertBlockDiagonal_MPIAIJ, 3129 0, 3130 MatGetSubMatricesParallel_MPIAIJ, 3131 /*129*/0 3132 }; 3133 3134 /* ----------------------------------------------------------------------------------------*/ 3135 3136 EXTERN_C_BEGIN 3137 #undef __FUNCT__ 3138 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3139 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3140 { 3141 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3142 PetscErrorCode ierr; 3143 3144 PetscFunctionBegin; 3145 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3146 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3147 PetscFunctionReturn(0); 3148 } 3149 EXTERN_C_END 3150 3151 EXTERN_C_BEGIN 3152 #undef __FUNCT__ 3153 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3154 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3155 { 3156 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3157 PetscErrorCode ierr; 3158 3159 PetscFunctionBegin; 3160 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3161 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3162 PetscFunctionReturn(0); 3163 } 3164 EXTERN_C_END 3165 3166 EXTERN_C_BEGIN 3167 #undef __FUNCT__ 3168 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3169 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3170 { 3171 Mat_MPIAIJ *b; 3172 PetscErrorCode ierr; 3173 PetscInt i; 3174 3175 PetscFunctionBegin; 3176 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3177 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3178 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3179 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3180 3181 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3182 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3183 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3184 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3185 if (d_nnz) { 3186 for (i=0; i<B->rmap->n; i++) { 3187 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3188 } 3189 } 3190 if (o_nnz) { 3191 for (i=0; i<B->rmap->n; i++) { 3192 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3193 } 3194 } 3195 b = (Mat_MPIAIJ*)B->data; 3196 3197 if (!B->preallocated) { 3198 /* Explicitly create 2 MATSEQAIJ matrices. */ 3199 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3200 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3201 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3202 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3203 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3204 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3205 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3206 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3207 } 3208 3209 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3210 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3211 B->preallocated = PETSC_TRUE; 3212 PetscFunctionReturn(0); 3213 } 3214 EXTERN_C_END 3215 3216 #undef __FUNCT__ 3217 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3218 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3219 { 3220 Mat mat; 3221 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3222 PetscErrorCode ierr; 3223 3224 PetscFunctionBegin; 3225 *newmat = 0; 3226 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3227 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3228 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3229 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3230 a = (Mat_MPIAIJ*)mat->data; 3231 3232 mat->factortype = matin->factortype; 3233 mat->rmap->bs = matin->rmap->bs; 3234 mat->assembled = PETSC_TRUE; 3235 mat->insertmode = NOT_SET_VALUES; 3236 mat->preallocated = PETSC_TRUE; 3237 3238 a->size = oldmat->size; 3239 a->rank = oldmat->rank; 3240 a->donotstash = oldmat->donotstash; 3241 a->roworiented = oldmat->roworiented; 3242 a->rowindices = 0; 3243 a->rowvalues = 0; 3244 a->getrowactive = PETSC_FALSE; 3245 3246 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3247 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3248 3249 if (oldmat->colmap) { 3250 #if defined (PETSC_USE_CTABLE) 3251 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3252 #else 3253 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3254 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3255 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3256 #endif 3257 } else a->colmap = 0; 3258 if (oldmat->garray) { 3259 PetscInt len; 3260 len = oldmat->B->cmap->n; 3261 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3262 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3263 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3264 } else a->garray = 0; 3265 3266 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3267 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3268 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3269 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3270 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3271 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3272 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3273 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3274 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3275 *newmat = mat; 3276 PetscFunctionReturn(0); 3277 } 3278 3279 3280 3281 #undef __FUNCT__ 3282 #define __FUNCT__ "MatLoad_MPIAIJ" 3283 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3284 { 3285 PetscScalar *vals,*svals; 3286 MPI_Comm comm = ((PetscObject)viewer)->comm; 3287 PetscErrorCode ierr; 3288 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3289 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3290 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3291 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3292 PetscInt cend,cstart,n,*rowners,sizesset=1; 3293 int fd; 3294 3295 PetscFunctionBegin; 3296 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3297 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3298 if (!rank) { 3299 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3300 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3301 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3302 } 3303 3304 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3305 3306 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3307 M = header[1]; N = header[2]; 3308 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3309 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3310 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3311 3312 /* If global sizes are set, check if they are consistent with that given in the file */ 3313 if (sizesset) { 3314 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3315 } 3316 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3317 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3318 3319 /* determine ownership of all rows */ 3320 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3321 else m = newMat->rmap->n; /* Set by user */ 3322 3323 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3324 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3325 3326 /* First process needs enough room for process with most rows */ 3327 if (!rank) { 3328 mmax = rowners[1]; 3329 for (i=2; i<size; i++) { 3330 mmax = PetscMax(mmax,rowners[i]); 3331 } 3332 } else mmax = m; 3333 3334 rowners[0] = 0; 3335 for (i=2; i<=size; i++) { 3336 rowners[i] += rowners[i-1]; 3337 } 3338 rstart = rowners[rank]; 3339 rend = rowners[rank+1]; 3340 3341 /* distribute row lengths to all processors */ 3342 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3343 if (!rank) { 3344 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3345 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3346 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3347 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3348 for (j=0; j<m; j++) { 3349 procsnz[0] += ourlens[j]; 3350 } 3351 for (i=1; i<size; i++) { 3352 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3353 /* calculate the number of nonzeros on each processor */ 3354 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3355 procsnz[i] += rowlengths[j]; 3356 } 3357 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3358 } 3359 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3360 } else { 3361 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3362 } 3363 3364 if (!rank) { 3365 /* determine max buffer needed and allocate it */ 3366 maxnz = 0; 3367 for (i=0; i<size; i++) { 3368 maxnz = PetscMax(maxnz,procsnz[i]); 3369 } 3370 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3371 3372 /* read in my part of the matrix column indices */ 3373 nz = procsnz[0]; 3374 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3375 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3376 3377 /* read in every one elses and ship off */ 3378 for (i=1; i<size; i++) { 3379 nz = procsnz[i]; 3380 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3381 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3382 } 3383 ierr = PetscFree(cols);CHKERRQ(ierr); 3384 } else { 3385 /* determine buffer space needed for message */ 3386 nz = 0; 3387 for (i=0; i<m; i++) { 3388 nz += ourlens[i]; 3389 } 3390 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3391 3392 /* receive message of column indices*/ 3393 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3394 } 3395 3396 /* determine column ownership if matrix is not square */ 3397 if (N != M) { 3398 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3399 else n = newMat->cmap->n; 3400 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3401 cstart = cend - n; 3402 } else { 3403 cstart = rstart; 3404 cend = rend; 3405 n = cend - cstart; 3406 } 3407 3408 /* loop over local rows, determining number of off diagonal entries */ 3409 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3410 jj = 0; 3411 for (i=0; i<m; i++) { 3412 for (j=0; j<ourlens[i]; j++) { 3413 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3414 jj++; 3415 } 3416 } 3417 3418 for (i=0; i<m; i++) { 3419 ourlens[i] -= offlens[i]; 3420 } 3421 if (!sizesset) { 3422 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3423 } 3424 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3425 3426 for (i=0; i<m; i++) { 3427 ourlens[i] += offlens[i]; 3428 } 3429 3430 if (!rank) { 3431 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3432 3433 /* read in my part of the matrix numerical values */ 3434 nz = procsnz[0]; 3435 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3436 3437 /* insert into matrix */ 3438 jj = rstart; 3439 smycols = mycols; 3440 svals = vals; 3441 for (i=0; i<m; i++) { 3442 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3443 smycols += ourlens[i]; 3444 svals += ourlens[i]; 3445 jj++; 3446 } 3447 3448 /* read in other processors and ship out */ 3449 for (i=1; i<size; i++) { 3450 nz = procsnz[i]; 3451 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3452 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3453 } 3454 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3455 } else { 3456 /* receive numeric values */ 3457 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3458 3459 /* receive message of values*/ 3460 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3461 3462 /* insert into matrix */ 3463 jj = rstart; 3464 smycols = mycols; 3465 svals = vals; 3466 for (i=0; i<m; i++) { 3467 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3468 smycols += ourlens[i]; 3469 svals += ourlens[i]; 3470 jj++; 3471 } 3472 } 3473 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3474 ierr = PetscFree(vals);CHKERRQ(ierr); 3475 ierr = PetscFree(mycols);CHKERRQ(ierr); 3476 ierr = PetscFree(rowners);CHKERRQ(ierr); 3477 3478 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3479 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3480 PetscFunctionReturn(0); 3481 } 3482 3483 #undef __FUNCT__ 3484 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3485 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3486 { 3487 PetscErrorCode ierr; 3488 IS iscol_local; 3489 PetscInt csize; 3490 3491 PetscFunctionBegin; 3492 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3493 if (call == MAT_REUSE_MATRIX) { 3494 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3495 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3496 } else { 3497 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3498 } 3499 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3500 if (call == MAT_INITIAL_MATRIX) { 3501 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3502 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3503 } 3504 PetscFunctionReturn(0); 3505 } 3506 3507 #undef __FUNCT__ 3508 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3509 /* 3510 Not great since it makes two copies of the submatrix, first an SeqAIJ 3511 in local and then by concatenating the local matrices the end result. 3512 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3513 3514 Note: This requires a sequential iscol with all indices. 3515 */ 3516 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3517 { 3518 PetscErrorCode ierr; 3519 PetscMPIInt rank,size; 3520 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3521 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3522 Mat *local,M,Mreuse; 3523 MatScalar *vwork,*aa; 3524 MPI_Comm comm = ((PetscObject)mat)->comm; 3525 Mat_SeqAIJ *aij; 3526 3527 3528 PetscFunctionBegin; 3529 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3530 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3531 3532 if (call == MAT_REUSE_MATRIX) { 3533 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3534 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3535 local = &Mreuse; 3536 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3537 } else { 3538 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3539 Mreuse = *local; 3540 ierr = PetscFree(local);CHKERRQ(ierr); 3541 } 3542 3543 /* 3544 m - number of local rows 3545 n - number of columns (same on all processors) 3546 rstart - first row in new global matrix generated 3547 */ 3548 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3549 if (call == MAT_INITIAL_MATRIX) { 3550 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3551 ii = aij->i; 3552 jj = aij->j; 3553 3554 /* 3555 Determine the number of non-zeros in the diagonal and off-diagonal 3556 portions of the matrix in order to do correct preallocation 3557 */ 3558 3559 /* first get start and end of "diagonal" columns */ 3560 if (csize == PETSC_DECIDE) { 3561 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3562 if (mglobal == n) { /* square matrix */ 3563 nlocal = m; 3564 } else { 3565 nlocal = n/size + ((n % size) > rank); 3566 } 3567 } else { 3568 nlocal = csize; 3569 } 3570 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3571 rstart = rend - nlocal; 3572 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3573 3574 /* next, compute all the lengths */ 3575 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3576 olens = dlens + m; 3577 for (i=0; i<m; i++) { 3578 jend = ii[i+1] - ii[i]; 3579 olen = 0; 3580 dlen = 0; 3581 for (j=0; j<jend; j++) { 3582 if (*jj < rstart || *jj >= rend) olen++; 3583 else dlen++; 3584 jj++; 3585 } 3586 olens[i] = olen; 3587 dlens[i] = dlen; 3588 } 3589 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3590 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3591 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3592 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3593 ierr = PetscFree(dlens);CHKERRQ(ierr); 3594 } else { 3595 PetscInt ml,nl; 3596 3597 M = *newmat; 3598 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3599 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3600 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3601 /* 3602 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3603 rather than the slower MatSetValues(). 3604 */ 3605 M->was_assembled = PETSC_TRUE; 3606 M->assembled = PETSC_FALSE; 3607 } 3608 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3609 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3610 ii = aij->i; 3611 jj = aij->j; 3612 aa = aij->a; 3613 for (i=0; i<m; i++) { 3614 row = rstart + i; 3615 nz = ii[i+1] - ii[i]; 3616 cwork = jj; jj += nz; 3617 vwork = aa; aa += nz; 3618 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3619 } 3620 3621 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3622 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3623 *newmat = M; 3624 3625 /* save submatrix used in processor for next request */ 3626 if (call == MAT_INITIAL_MATRIX) { 3627 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3628 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3629 } 3630 3631 PetscFunctionReturn(0); 3632 } 3633 3634 EXTERN_C_BEGIN 3635 #undef __FUNCT__ 3636 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3637 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3638 { 3639 PetscInt m,cstart, cend,j,nnz,i,d; 3640 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3641 const PetscInt *JJ; 3642 PetscScalar *values; 3643 PetscErrorCode ierr; 3644 3645 PetscFunctionBegin; 3646 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3647 3648 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3649 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3650 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3651 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3652 m = B->rmap->n; 3653 cstart = B->cmap->rstart; 3654 cend = B->cmap->rend; 3655 rstart = B->rmap->rstart; 3656 3657 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3658 3659 #if defined(PETSC_USE_DEBUGGING) 3660 for (i=0; i<m; i++) { 3661 nnz = Ii[i+1]- Ii[i]; 3662 JJ = J + Ii[i]; 3663 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3664 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3665 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3666 } 3667 #endif 3668 3669 for (i=0; i<m; i++) { 3670 nnz = Ii[i+1]- Ii[i]; 3671 JJ = J + Ii[i]; 3672 nnz_max = PetscMax(nnz_max,nnz); 3673 d = 0; 3674 for (j=0; j<nnz; j++) { 3675 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3676 } 3677 d_nnz[i] = d; 3678 o_nnz[i] = nnz - d; 3679 } 3680 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3681 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3682 3683 if (v) values = (PetscScalar*)v; 3684 else { 3685 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3686 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3687 } 3688 3689 for (i=0; i<m; i++) { 3690 ii = i + rstart; 3691 nnz = Ii[i+1]- Ii[i]; 3692 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3693 } 3694 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3695 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3696 3697 if (!v) { 3698 ierr = PetscFree(values);CHKERRQ(ierr); 3699 } 3700 PetscFunctionReturn(0); 3701 } 3702 EXTERN_C_END 3703 3704 #undef __FUNCT__ 3705 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3706 /*@ 3707 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3708 (the default parallel PETSc format). 3709 3710 Collective on MPI_Comm 3711 3712 Input Parameters: 3713 + B - the matrix 3714 . i - the indices into j for the start of each local row (starts with zero) 3715 . j - the column indices for each local row (starts with zero) 3716 - v - optional values in the matrix 3717 3718 Level: developer 3719 3720 Notes: 3721 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3722 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3723 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3724 3725 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3726 3727 The format which is used for the sparse matrix input, is equivalent to a 3728 row-major ordering.. i.e for the following matrix, the input data expected is 3729 as shown: 3730 3731 1 0 0 3732 2 0 3 P0 3733 ------- 3734 4 5 6 P1 3735 3736 Process0 [P0]: rows_owned=[0,1] 3737 i = {0,1,3} [size = nrow+1 = 2+1] 3738 j = {0,0,2} [size = nz = 6] 3739 v = {1,2,3} [size = nz = 6] 3740 3741 Process1 [P1]: rows_owned=[2] 3742 i = {0,3} [size = nrow+1 = 1+1] 3743 j = {0,1,2} [size = nz = 6] 3744 v = {4,5,6} [size = nz = 6] 3745 3746 .keywords: matrix, aij, compressed row, sparse, parallel 3747 3748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3749 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3750 @*/ 3751 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3752 { 3753 PetscErrorCode ierr; 3754 3755 PetscFunctionBegin; 3756 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3757 PetscFunctionReturn(0); 3758 } 3759 3760 #undef __FUNCT__ 3761 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3762 /*@C 3763 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3764 (the default parallel PETSc format). For good matrix assembly performance 3765 the user should preallocate the matrix storage by setting the parameters 3766 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3767 performance can be increased by more than a factor of 50. 3768 3769 Collective on MPI_Comm 3770 3771 Input Parameters: 3772 + A - the matrix 3773 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3774 (same value is used for all local rows) 3775 . d_nnz - array containing the number of nonzeros in the various rows of the 3776 DIAGONAL portion of the local submatrix (possibly different for each row) 3777 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3778 The size of this array is equal to the number of local rows, i.e 'm'. 3779 You must leave room for the diagonal entry even if it is zero. 3780 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3781 submatrix (same value is used for all local rows). 3782 - o_nnz - array containing the number of nonzeros in the various rows of the 3783 OFF-DIAGONAL portion of the local submatrix (possibly different for 3784 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3785 structure. The size of this array is equal to the number 3786 of local rows, i.e 'm'. 3787 3788 If the *_nnz parameter is given then the *_nz parameter is ignored 3789 3790 The AIJ format (also called the Yale sparse matrix format or 3791 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3792 storage. The stored row and column indices begin with zero. 3793 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3794 3795 The parallel matrix is partitioned such that the first m0 rows belong to 3796 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3797 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3798 3799 The DIAGONAL portion of the local submatrix of a processor can be defined 3800 as the submatrix which is obtained by extraction the part corresponding to 3801 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3802 first row that belongs to the processor, r2 is the last row belonging to 3803 the this processor, and c1-c2 is range of indices of the local part of a 3804 vector suitable for applying the matrix to. This is an mxn matrix. In the 3805 common case of a square matrix, the row and column ranges are the same and 3806 the DIAGONAL part is also square. The remaining portion of the local 3807 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3808 3809 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3810 3811 You can call MatGetInfo() to get information on how effective the preallocation was; 3812 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3813 You can also run with the option -info and look for messages with the string 3814 malloc in them to see if additional memory allocation was needed. 3815 3816 Example usage: 3817 3818 Consider the following 8x8 matrix with 34 non-zero values, that is 3819 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3820 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3821 as follows: 3822 3823 .vb 3824 1 2 0 | 0 3 0 | 0 4 3825 Proc0 0 5 6 | 7 0 0 | 8 0 3826 9 0 10 | 11 0 0 | 12 0 3827 ------------------------------------- 3828 13 0 14 | 15 16 17 | 0 0 3829 Proc1 0 18 0 | 19 20 21 | 0 0 3830 0 0 0 | 22 23 0 | 24 0 3831 ------------------------------------- 3832 Proc2 25 26 27 | 0 0 28 | 29 0 3833 30 0 0 | 31 32 33 | 0 34 3834 .ve 3835 3836 This can be represented as a collection of submatrices as: 3837 3838 .vb 3839 A B C 3840 D E F 3841 G H I 3842 .ve 3843 3844 Where the submatrices A,B,C are owned by proc0, D,E,F are 3845 owned by proc1, G,H,I are owned by proc2. 3846 3847 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3848 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3849 The 'M','N' parameters are 8,8, and have the same values on all procs. 3850 3851 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3852 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3853 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3854 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3855 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3856 matrix, ans [DF] as another SeqAIJ matrix. 3857 3858 When d_nz, o_nz parameters are specified, d_nz storage elements are 3859 allocated for every row of the local diagonal submatrix, and o_nz 3860 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3861 One way to choose d_nz and o_nz is to use the max nonzerors per local 3862 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3863 In this case, the values of d_nz,o_nz are: 3864 .vb 3865 proc0 : dnz = 2, o_nz = 2 3866 proc1 : dnz = 3, o_nz = 2 3867 proc2 : dnz = 1, o_nz = 4 3868 .ve 3869 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3870 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3871 for proc3. i.e we are using 12+15+10=37 storage locations to store 3872 34 values. 3873 3874 When d_nnz, o_nnz parameters are specified, the storage is specified 3875 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3876 In the above case the values for d_nnz,o_nnz are: 3877 .vb 3878 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3879 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3880 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3881 .ve 3882 Here the space allocated is sum of all the above values i.e 34, and 3883 hence pre-allocation is perfect. 3884 3885 Level: intermediate 3886 3887 .keywords: matrix, aij, compressed row, sparse, parallel 3888 3889 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3890 MPIAIJ, MatGetInfo() 3891 @*/ 3892 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3893 { 3894 PetscErrorCode ierr; 3895 3896 PetscFunctionBegin; 3897 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3898 PetscValidType(B,1); 3899 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3900 PetscFunctionReturn(0); 3901 } 3902 3903 #undef __FUNCT__ 3904 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3905 /*@ 3906 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3907 CSR format the local rows. 3908 3909 Collective on MPI_Comm 3910 3911 Input Parameters: 3912 + comm - MPI communicator 3913 . m - number of local rows (Cannot be PETSC_DECIDE) 3914 . n - This value should be the same as the local size used in creating the 3915 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3916 calculated if N is given) For square matrices n is almost always m. 3917 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3918 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3919 . i - row indices 3920 . j - column indices 3921 - a - matrix values 3922 3923 Output Parameter: 3924 . mat - the matrix 3925 3926 Level: intermediate 3927 3928 Notes: 3929 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3930 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3931 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3932 3933 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3934 3935 The format which is used for the sparse matrix input, is equivalent to a 3936 row-major ordering.. i.e for the following matrix, the input data expected is 3937 as shown: 3938 3939 1 0 0 3940 2 0 3 P0 3941 ------- 3942 4 5 6 P1 3943 3944 Process0 [P0]: rows_owned=[0,1] 3945 i = {0,1,3} [size = nrow+1 = 2+1] 3946 j = {0,0,2} [size = nz = 6] 3947 v = {1,2,3} [size = nz = 6] 3948 3949 Process1 [P1]: rows_owned=[2] 3950 i = {0,3} [size = nrow+1 = 1+1] 3951 j = {0,1,2} [size = nz = 6] 3952 v = {4,5,6} [size = nz = 6] 3953 3954 .keywords: matrix, aij, compressed row, sparse, parallel 3955 3956 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3957 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3958 @*/ 3959 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3960 { 3961 PetscErrorCode ierr; 3962 3963 PetscFunctionBegin; 3964 if (i[0]) { 3965 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3966 } 3967 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3968 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3969 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3970 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3971 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3972 PetscFunctionReturn(0); 3973 } 3974 3975 #undef __FUNCT__ 3976 #define __FUNCT__ "MatCreateMPIAIJ" 3977 /*@C 3978 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3979 (the default parallel PETSc format). For good matrix assembly performance 3980 the user should preallocate the matrix storage by setting the parameters 3981 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3982 performance can be increased by more than a factor of 50. 3983 3984 Collective on MPI_Comm 3985 3986 Input Parameters: 3987 + comm - MPI communicator 3988 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3989 This value should be the same as the local size used in creating the 3990 y vector for the matrix-vector product y = Ax. 3991 . n - This value should be the same as the local size used in creating the 3992 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3993 calculated if N is given) For square matrices n is almost always m. 3994 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3995 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3996 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3997 (same value is used for all local rows) 3998 . d_nnz - array containing the number of nonzeros in the various rows of the 3999 DIAGONAL portion of the local submatrix (possibly different for each row) 4000 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4001 The size of this array is equal to the number of local rows, i.e 'm'. 4002 You must leave room for the diagonal entry even if it is zero. 4003 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4004 submatrix (same value is used for all local rows). 4005 - o_nnz - array containing the number of nonzeros in the various rows of the 4006 OFF-DIAGONAL portion of the local submatrix (possibly different for 4007 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4008 structure. The size of this array is equal to the number 4009 of local rows, i.e 'm'. 4010 4011 Output Parameter: 4012 . A - the matrix 4013 4014 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4015 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4016 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4017 4018 Notes: 4019 If the *_nnz parameter is given then the *_nz parameter is ignored 4020 4021 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4022 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4023 storage requirements for this matrix. 4024 4025 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4026 processor than it must be used on all processors that share the object for 4027 that argument. 4028 4029 The user MUST specify either the local or global matrix dimensions 4030 (possibly both). 4031 4032 The parallel matrix is partitioned across processors such that the 4033 first m0 rows belong to process 0, the next m1 rows belong to 4034 process 1, the next m2 rows belong to process 2 etc.. where 4035 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4036 values corresponding to [m x N] submatrix. 4037 4038 The columns are logically partitioned with the n0 columns belonging 4039 to 0th partition, the next n1 columns belonging to the next 4040 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4041 4042 The DIAGONAL portion of the local submatrix on any given processor 4043 is the submatrix corresponding to the rows and columns m,n 4044 corresponding to the given processor. i.e diagonal matrix on 4045 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4046 etc. The remaining portion of the local submatrix [m x (N-n)] 4047 constitute the OFF-DIAGONAL portion. The example below better 4048 illustrates this concept. 4049 4050 For a square global matrix we define each processor's diagonal portion 4051 to be its local rows and the corresponding columns (a square submatrix); 4052 each processor's off-diagonal portion encompasses the remainder of the 4053 local matrix (a rectangular submatrix). 4054 4055 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4056 4057 When calling this routine with a single process communicator, a matrix of 4058 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4059 type of communicator, use the construction mechanism: 4060 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4061 4062 By default, this format uses inodes (identical nodes) when possible. 4063 We search for consecutive rows with the same nonzero structure, thereby 4064 reusing matrix information to achieve increased efficiency. 4065 4066 Options Database Keys: 4067 + -mat_no_inode - Do not use inodes 4068 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4069 - -mat_aij_oneindex - Internally use indexing starting at 1 4070 rather than 0. Note that when calling MatSetValues(), 4071 the user still MUST index entries starting at 0! 4072 4073 4074 Example usage: 4075 4076 Consider the following 8x8 matrix with 34 non-zero values, that is 4077 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4078 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4079 as follows: 4080 4081 .vb 4082 1 2 0 | 0 3 0 | 0 4 4083 Proc0 0 5 6 | 7 0 0 | 8 0 4084 9 0 10 | 11 0 0 | 12 0 4085 ------------------------------------- 4086 13 0 14 | 15 16 17 | 0 0 4087 Proc1 0 18 0 | 19 20 21 | 0 0 4088 0 0 0 | 22 23 0 | 24 0 4089 ------------------------------------- 4090 Proc2 25 26 27 | 0 0 28 | 29 0 4091 30 0 0 | 31 32 33 | 0 34 4092 .ve 4093 4094 This can be represented as a collection of submatrices as: 4095 4096 .vb 4097 A B C 4098 D E F 4099 G H I 4100 .ve 4101 4102 Where the submatrices A,B,C are owned by proc0, D,E,F are 4103 owned by proc1, G,H,I are owned by proc2. 4104 4105 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4106 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4107 The 'M','N' parameters are 8,8, and have the same values on all procs. 4108 4109 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4110 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4111 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4112 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4113 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4114 matrix, ans [DF] as another SeqAIJ matrix. 4115 4116 When d_nz, o_nz parameters are specified, d_nz storage elements are 4117 allocated for every row of the local diagonal submatrix, and o_nz 4118 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4119 One way to choose d_nz and o_nz is to use the max nonzerors per local 4120 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4121 In this case, the values of d_nz,o_nz are: 4122 .vb 4123 proc0 : dnz = 2, o_nz = 2 4124 proc1 : dnz = 3, o_nz = 2 4125 proc2 : dnz = 1, o_nz = 4 4126 .ve 4127 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4128 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4129 for proc3. i.e we are using 12+15+10=37 storage locations to store 4130 34 values. 4131 4132 When d_nnz, o_nnz parameters are specified, the storage is specified 4133 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4134 In the above case the values for d_nnz,o_nnz are: 4135 .vb 4136 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4137 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4138 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4139 .ve 4140 Here the space allocated is sum of all the above values i.e 34, and 4141 hence pre-allocation is perfect. 4142 4143 Level: intermediate 4144 4145 .keywords: matrix, aij, compressed row, sparse, parallel 4146 4147 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4148 MPIAIJ, MatCreateMPIAIJWithArrays() 4149 @*/ 4150 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4151 { 4152 PetscErrorCode ierr; 4153 PetscMPIInt size; 4154 4155 PetscFunctionBegin; 4156 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4157 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4159 if (size > 1) { 4160 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4161 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4162 } else { 4163 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4164 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4165 } 4166 PetscFunctionReturn(0); 4167 } 4168 4169 #undef __FUNCT__ 4170 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4171 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4172 { 4173 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4174 4175 PetscFunctionBegin; 4176 *Ad = a->A; 4177 *Ao = a->B; 4178 *colmap = a->garray; 4179 PetscFunctionReturn(0); 4180 } 4181 4182 #undef __FUNCT__ 4183 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4184 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4185 { 4186 PetscErrorCode ierr; 4187 PetscInt i; 4188 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4189 4190 PetscFunctionBegin; 4191 if (coloring->ctype == IS_COLORING_GLOBAL) { 4192 ISColoringValue *allcolors,*colors; 4193 ISColoring ocoloring; 4194 4195 /* set coloring for diagonal portion */ 4196 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4197 4198 /* set coloring for off-diagonal portion */ 4199 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4200 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4201 for (i=0; i<a->B->cmap->n; i++) { 4202 colors[i] = allcolors[a->garray[i]]; 4203 } 4204 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4205 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4206 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4207 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4208 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4209 ISColoringValue *colors; 4210 PetscInt *larray; 4211 ISColoring ocoloring; 4212 4213 /* set coloring for diagonal portion */ 4214 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4215 for (i=0; i<a->A->cmap->n; i++) { 4216 larray[i] = i + A->cmap->rstart; 4217 } 4218 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4219 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4220 for (i=0; i<a->A->cmap->n; i++) { 4221 colors[i] = coloring->colors[larray[i]]; 4222 } 4223 ierr = PetscFree(larray);CHKERRQ(ierr); 4224 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4225 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4226 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4227 4228 /* set coloring for off-diagonal portion */ 4229 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4230 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4231 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4232 for (i=0; i<a->B->cmap->n; i++) { 4233 colors[i] = coloring->colors[larray[i]]; 4234 } 4235 ierr = PetscFree(larray);CHKERRQ(ierr); 4236 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4237 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4238 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4239 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4240 4241 PetscFunctionReturn(0); 4242 } 4243 4244 #if defined(PETSC_HAVE_ADIC) 4245 #undef __FUNCT__ 4246 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4247 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4248 { 4249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4250 PetscErrorCode ierr; 4251 4252 PetscFunctionBegin; 4253 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4254 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4255 PetscFunctionReturn(0); 4256 } 4257 #endif 4258 4259 #undef __FUNCT__ 4260 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4261 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4262 { 4263 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4264 PetscErrorCode ierr; 4265 4266 PetscFunctionBegin; 4267 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4268 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4269 PetscFunctionReturn(0); 4270 } 4271 4272 #undef __FUNCT__ 4273 #define __FUNCT__ "MatMergeSymbolic" 4274 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4275 { 4276 PetscErrorCode ierr; 4277 PetscInt m,N,i,rstart,nnz,*dnz,*onz; 4278 PetscInt *indx; 4279 4280 PetscFunctionBegin; 4281 /* This routine will ONLY return MPIAIJ type matrix */ 4282 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4283 if (n == PETSC_DECIDE){ 4284 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4285 } 4286 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4287 rstart -= m; 4288 4289 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4290 for (i=0;i<m;i++) { 4291 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4292 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4293 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4294 } 4295 4296 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4297 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4298 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4299 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4300 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4301 PetscFunctionReturn(0); 4302 } 4303 4304 #undef __FUNCT__ 4305 #define __FUNCT__ "MatMergeNumeric" 4306 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4307 { 4308 PetscErrorCode ierr; 4309 PetscInt m,N,i,rstart,nnz,Ii; 4310 PetscInt *indx; 4311 PetscScalar *values; 4312 4313 PetscFunctionBegin; 4314 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4315 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4316 for (i=0;i<m;i++) { 4317 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4318 Ii = i + rstart; 4319 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4320 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4321 } 4322 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4323 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4324 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4325 PetscFunctionReturn(0); 4326 } 4327 4328 #undef __FUNCT__ 4329 #define __FUNCT__ "MatMerge" 4330 /*@ 4331 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4332 matrices from each processor 4333 4334 Collective on MPI_Comm 4335 4336 Input Parameters: 4337 + comm - the communicators the parallel matrix will live on 4338 . inmat - the input sequential matrices 4339 . n - number of local columns (or PETSC_DECIDE) 4340 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4341 4342 Output Parameter: 4343 . outmat - the parallel matrix generated 4344 4345 Level: advanced 4346 4347 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4348 4349 @*/ 4350 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4351 { 4352 PetscErrorCode ierr; 4353 4354 PetscFunctionBegin; 4355 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4356 if (scall == MAT_INITIAL_MATRIX){ 4357 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4358 } 4359 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4360 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4361 PetscFunctionReturn(0); 4362 } 4363 4364 #undef __FUNCT__ 4365 #define __FUNCT__ "MatFileSplit" 4366 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4367 { 4368 PetscErrorCode ierr; 4369 PetscMPIInt rank; 4370 PetscInt m,N,i,rstart,nnz; 4371 size_t len; 4372 const PetscInt *indx; 4373 PetscViewer out; 4374 char *name; 4375 Mat B; 4376 const PetscScalar *values; 4377 4378 PetscFunctionBegin; 4379 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4380 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4381 /* Should this be the type of the diagonal block of A? */ 4382 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4383 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4384 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4385 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4386 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4387 for (i=0;i<m;i++) { 4388 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4389 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4390 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4391 } 4392 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4393 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4394 4395 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4396 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4397 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4398 sprintf(name,"%s.%d",outfile,rank); 4399 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4400 ierr = PetscFree(name); 4401 ierr = MatView(B,out);CHKERRQ(ierr); 4402 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4403 ierr = MatDestroy(&B);CHKERRQ(ierr); 4404 PetscFunctionReturn(0); 4405 } 4406 4407 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4408 #undef __FUNCT__ 4409 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4410 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4411 { 4412 PetscErrorCode ierr; 4413 Mat_Merge_SeqsToMPI *merge; 4414 PetscContainer container; 4415 4416 PetscFunctionBegin; 4417 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4418 if (container) { 4419 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4420 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4426 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4427 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4428 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4429 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4430 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4431 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4432 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4433 ierr = PetscFree(merge);CHKERRQ(ierr); 4434 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4435 } 4436 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4437 PetscFunctionReturn(0); 4438 } 4439 4440 #include <../src/mat/utils/freespace.h> 4441 #include <petscbt.h> 4442 4443 #undef __FUNCT__ 4444 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4445 /*@C 4446 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4447 matrices from each processor 4448 4449 Collective on MPI_Comm 4450 4451 Input Parameters: 4452 + comm - the communicators the parallel matrix will live on 4453 . seqmat - the input sequential matrices 4454 . m - number of local rows (or PETSC_DECIDE) 4455 . n - number of local columns (or PETSC_DECIDE) 4456 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4457 4458 Output Parameter: 4459 . mpimat - the parallel matrix generated 4460 4461 Level: advanced 4462 4463 Notes: 4464 The dimensions of the sequential matrix in each processor MUST be the same. 4465 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4466 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4467 @*/ 4468 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4469 { 4470 PetscErrorCode ierr; 4471 MPI_Comm comm=((PetscObject)mpimat)->comm; 4472 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4473 PetscMPIInt size,rank,taga,*len_s; 4474 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4475 PetscInt proc,m; 4476 PetscInt **buf_ri,**buf_rj; 4477 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4478 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4479 MPI_Request *s_waits,*r_waits; 4480 MPI_Status *status; 4481 MatScalar *aa=a->a; 4482 MatScalar **abuf_r,*ba_i; 4483 Mat_Merge_SeqsToMPI *merge; 4484 PetscContainer container; 4485 4486 PetscFunctionBegin; 4487 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4488 4489 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4490 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4491 4492 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4493 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4494 4495 bi = merge->bi; 4496 bj = merge->bj; 4497 buf_ri = merge->buf_ri; 4498 buf_rj = merge->buf_rj; 4499 4500 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4501 owners = merge->rowmap->range; 4502 len_s = merge->len_s; 4503 4504 /* send and recv matrix values */ 4505 /*-----------------------------*/ 4506 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4507 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4508 4509 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4510 for (proc=0,k=0; proc<size; proc++){ 4511 if (!len_s[proc]) continue; 4512 i = owners[proc]; 4513 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4514 k++; 4515 } 4516 4517 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4518 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4519 ierr = PetscFree(status);CHKERRQ(ierr); 4520 4521 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4522 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4523 4524 /* insert mat values of mpimat */ 4525 /*----------------------------*/ 4526 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4527 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4528 4529 for (k=0; k<merge->nrecv; k++){ 4530 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4531 nrows = *(buf_ri_k[k]); 4532 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4533 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4534 } 4535 4536 /* set values of ba */ 4537 m = merge->rowmap->n; 4538 for (i=0; i<m; i++) { 4539 arow = owners[rank] + i; 4540 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4541 bnzi = bi[i+1] - bi[i]; 4542 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4543 4544 /* add local non-zero vals of this proc's seqmat into ba */ 4545 anzi = ai[arow+1] - ai[arow]; 4546 aj = a->j + ai[arow]; 4547 aa = a->a + ai[arow]; 4548 nextaj = 0; 4549 for (j=0; nextaj<anzi; j++){ 4550 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4551 ba_i[j] += aa[nextaj++]; 4552 } 4553 } 4554 4555 /* add received vals into ba */ 4556 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4557 /* i-th row */ 4558 if (i == *nextrow[k]) { 4559 anzi = *(nextai[k]+1) - *nextai[k]; 4560 aj = buf_rj[k] + *(nextai[k]); 4561 aa = abuf_r[k] + *(nextai[k]); 4562 nextaj = 0; 4563 for (j=0; nextaj<anzi; j++){ 4564 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4565 ba_i[j] += aa[nextaj++]; 4566 } 4567 } 4568 nextrow[k]++; nextai[k]++; 4569 } 4570 } 4571 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4572 } 4573 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4574 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4575 4576 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4577 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4578 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4579 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4580 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4581 PetscFunctionReturn(0); 4582 } 4583 4584 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4585 4586 #undef __FUNCT__ 4587 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4588 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4589 { 4590 PetscErrorCode ierr; 4591 Mat B_mpi; 4592 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4593 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4594 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4595 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4596 PetscInt len,proc,*dnz,*onz; 4597 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4598 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4599 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4600 MPI_Status *status; 4601 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4602 PetscBT lnkbt; 4603 Mat_Merge_SeqsToMPI *merge; 4604 PetscContainer container; 4605 4606 PetscFunctionBegin; 4607 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4608 4609 /* make sure it is a PETSc comm */ 4610 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4611 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4612 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4613 4614 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4615 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4616 4617 /* determine row ownership */ 4618 /*---------------------------------------------------------*/ 4619 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4620 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4621 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4622 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4623 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4624 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4625 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4626 4627 m = merge->rowmap->n; 4628 M = merge->rowmap->N; 4629 owners = merge->rowmap->range; 4630 4631 /* determine the number of messages to send, their lengths */ 4632 /*---------------------------------------------------------*/ 4633 len_s = merge->len_s; 4634 4635 len = 0; /* length of buf_si[] */ 4636 merge->nsend = 0; 4637 for (proc=0; proc<size; proc++){ 4638 len_si[proc] = 0; 4639 if (proc == rank){ 4640 len_s[proc] = 0; 4641 } else { 4642 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4643 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4644 } 4645 if (len_s[proc]) { 4646 merge->nsend++; 4647 nrows = 0; 4648 for (i=owners[proc]; i<owners[proc+1]; i++){ 4649 if (ai[i+1] > ai[i]) nrows++; 4650 } 4651 len_si[proc] = 2*(nrows+1); 4652 len += len_si[proc]; 4653 } 4654 } 4655 4656 /* determine the number and length of messages to receive for ij-structure */ 4657 /*-------------------------------------------------------------------------*/ 4658 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4659 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4660 4661 /* post the Irecv of j-structure */ 4662 /*-------------------------------*/ 4663 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4664 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4665 4666 /* post the Isend of j-structure */ 4667 /*--------------------------------*/ 4668 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4669 4670 for (proc=0, k=0; proc<size; proc++){ 4671 if (!len_s[proc]) continue; 4672 i = owners[proc]; 4673 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4674 k++; 4675 } 4676 4677 /* receives and sends of j-structure are complete */ 4678 /*------------------------------------------------*/ 4679 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4680 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4681 4682 /* send and recv i-structure */ 4683 /*---------------------------*/ 4684 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4685 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4686 4687 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4688 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4689 for (proc=0,k=0; proc<size; proc++){ 4690 if (!len_s[proc]) continue; 4691 /* form outgoing message for i-structure: 4692 buf_si[0]: nrows to be sent 4693 [1:nrows]: row index (global) 4694 [nrows+1:2*nrows+1]: i-structure index 4695 */ 4696 /*-------------------------------------------*/ 4697 nrows = len_si[proc]/2 - 1; 4698 buf_si_i = buf_si + nrows+1; 4699 buf_si[0] = nrows; 4700 buf_si_i[0] = 0; 4701 nrows = 0; 4702 for (i=owners[proc]; i<owners[proc+1]; i++){ 4703 anzi = ai[i+1] - ai[i]; 4704 if (anzi) { 4705 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4706 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4707 nrows++; 4708 } 4709 } 4710 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4711 k++; 4712 buf_si += len_si[proc]; 4713 } 4714 4715 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4716 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4717 4718 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4719 for (i=0; i<merge->nrecv; i++){ 4720 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4721 } 4722 4723 ierr = PetscFree(len_si);CHKERRQ(ierr); 4724 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4725 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4726 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4727 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4728 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4729 ierr = PetscFree(status);CHKERRQ(ierr); 4730 4731 /* compute a local seq matrix in each processor */ 4732 /*----------------------------------------------*/ 4733 /* allocate bi array and free space for accumulating nonzero column info */ 4734 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4735 bi[0] = 0; 4736 4737 /* create and initialize a linked list */ 4738 nlnk = N+1; 4739 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4740 4741 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4742 len = 0; 4743 len = ai[owners[rank+1]] - ai[owners[rank]]; 4744 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4745 current_space = free_space; 4746 4747 /* determine symbolic info for each local row */ 4748 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4749 4750 for (k=0; k<merge->nrecv; k++){ 4751 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4752 nrows = *buf_ri_k[k]; 4753 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4754 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4755 } 4756 4757 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4758 len = 0; 4759 for (i=0;i<m;i++) { 4760 bnzi = 0; 4761 /* add local non-zero cols of this proc's seqmat into lnk */ 4762 arow = owners[rank] + i; 4763 anzi = ai[arow+1] - ai[arow]; 4764 aj = a->j + ai[arow]; 4765 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4766 bnzi += nlnk; 4767 /* add received col data into lnk */ 4768 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4769 if (i == *nextrow[k]) { /* i-th row */ 4770 anzi = *(nextai[k]+1) - *nextai[k]; 4771 aj = buf_rj[k] + *nextai[k]; 4772 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4773 bnzi += nlnk; 4774 nextrow[k]++; nextai[k]++; 4775 } 4776 } 4777 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4778 4779 /* if free space is not available, make more free space */ 4780 if (current_space->local_remaining<bnzi) { 4781 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4782 nspacedouble++; 4783 } 4784 /* copy data into free space, then initialize lnk */ 4785 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4786 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4787 4788 current_space->array += bnzi; 4789 current_space->local_used += bnzi; 4790 current_space->local_remaining -= bnzi; 4791 4792 bi[i+1] = bi[i] + bnzi; 4793 } 4794 4795 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4796 4797 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4798 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4799 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4800 4801 /* create symbolic parallel matrix B_mpi */ 4802 /*---------------------------------------*/ 4803 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4804 if (n==PETSC_DECIDE) { 4805 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4806 } else { 4807 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4808 } 4809 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4810 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4811 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4812 4813 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4814 B_mpi->assembled = PETSC_FALSE; 4815 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4816 merge->bi = bi; 4817 merge->bj = bj; 4818 merge->buf_ri = buf_ri; 4819 merge->buf_rj = buf_rj; 4820 merge->coi = PETSC_NULL; 4821 merge->coj = PETSC_NULL; 4822 merge->owners_co = PETSC_NULL; 4823 4824 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4825 4826 /* attach the supporting struct to B_mpi for reuse */ 4827 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4828 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4829 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4830 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4831 *mpimat = B_mpi; 4832 4833 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4834 PetscFunctionReturn(0); 4835 } 4836 4837 #undef __FUNCT__ 4838 #define __FUNCT__ "MatMerge_SeqsToMPI" 4839 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4840 { 4841 PetscErrorCode ierr; 4842 4843 PetscFunctionBegin; 4844 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4845 if (scall == MAT_INITIAL_MATRIX){ 4846 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4847 } 4848 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4849 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4850 PetscFunctionReturn(0); 4851 } 4852 4853 #undef __FUNCT__ 4854 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4855 /*@ 4856 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4857 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4858 with MatGetSize() 4859 4860 Not Collective 4861 4862 Input Parameters: 4863 + A - the matrix 4864 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4865 4866 Output Parameter: 4867 . A_loc - the local sequential matrix generated 4868 4869 Level: developer 4870 4871 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4872 4873 @*/ 4874 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4875 { 4876 PetscErrorCode ierr; 4877 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4878 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4879 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4880 MatScalar *aa=a->a,*ba=b->a,*cam; 4881 PetscScalar *ca; 4882 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4883 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4884 PetscBool match; 4885 4886 PetscFunctionBegin; 4887 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4888 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4889 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4890 if (scall == MAT_INITIAL_MATRIX){ 4891 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4892 ci[0] = 0; 4893 for (i=0; i<am; i++){ 4894 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4895 } 4896 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4897 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4898 k = 0; 4899 for (i=0; i<am; i++) { 4900 ncols_o = bi[i+1] - bi[i]; 4901 ncols_d = ai[i+1] - ai[i]; 4902 /* off-diagonal portion of A */ 4903 for (jo=0; jo<ncols_o; jo++) { 4904 col = cmap[*bj]; 4905 if (col >= cstart) break; 4906 cj[k] = col; bj++; 4907 ca[k++] = *ba++; 4908 } 4909 /* diagonal portion of A */ 4910 for (j=0; j<ncols_d; j++) { 4911 cj[k] = cstart + *aj++; 4912 ca[k++] = *aa++; 4913 } 4914 /* off-diagonal portion of A */ 4915 for (j=jo; j<ncols_o; j++) { 4916 cj[k] = cmap[*bj++]; 4917 ca[k++] = *ba++; 4918 } 4919 } 4920 /* put together the new matrix */ 4921 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4922 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4923 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4924 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4925 mat->free_a = PETSC_TRUE; 4926 mat->free_ij = PETSC_TRUE; 4927 mat->nonew = 0; 4928 } else if (scall == MAT_REUSE_MATRIX){ 4929 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4930 ci = mat->i; cj = mat->j; cam = mat->a; 4931 for (i=0; i<am; i++) { 4932 /* off-diagonal portion of A */ 4933 ncols_o = bi[i+1] - bi[i]; 4934 for (jo=0; jo<ncols_o; jo++) { 4935 col = cmap[*bj]; 4936 if (col >= cstart) break; 4937 *cam++ = *ba++; bj++; 4938 } 4939 /* diagonal portion of A */ 4940 ncols_d = ai[i+1] - ai[i]; 4941 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4942 /* off-diagonal portion of A */ 4943 for (j=jo; j<ncols_o; j++) { 4944 *cam++ = *ba++; bj++; 4945 } 4946 } 4947 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4948 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4949 PetscFunctionReturn(0); 4950 } 4951 4952 #undef __FUNCT__ 4953 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4954 /*@C 4955 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4956 4957 Not Collective 4958 4959 Input Parameters: 4960 + A - the matrix 4961 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4962 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4963 4964 Output Parameter: 4965 . A_loc - the local sequential matrix generated 4966 4967 Level: developer 4968 4969 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4970 4971 @*/ 4972 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4973 { 4974 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4975 PetscErrorCode ierr; 4976 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4977 IS isrowa,iscola; 4978 Mat *aloc; 4979 PetscBool match; 4980 4981 PetscFunctionBegin; 4982 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4983 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4984 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4985 if (!row){ 4986 start = A->rmap->rstart; end = A->rmap->rend; 4987 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4988 } else { 4989 isrowa = *row; 4990 } 4991 if (!col){ 4992 start = A->cmap->rstart; 4993 cmap = a->garray; 4994 nzA = a->A->cmap->n; 4995 nzB = a->B->cmap->n; 4996 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4997 ncols = 0; 4998 for (i=0; i<nzB; i++) { 4999 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5000 else break; 5001 } 5002 imark = i; 5003 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5004 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5005 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5006 } else { 5007 iscola = *col; 5008 } 5009 if (scall != MAT_INITIAL_MATRIX){ 5010 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5011 aloc[0] = *A_loc; 5012 } 5013 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5014 *A_loc = aloc[0]; 5015 ierr = PetscFree(aloc);CHKERRQ(ierr); 5016 if (!row){ 5017 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5018 } 5019 if (!col){ 5020 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5021 } 5022 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5023 PetscFunctionReturn(0); 5024 } 5025 5026 #undef __FUNCT__ 5027 #define __FUNCT__ "MatGetBrowsOfAcols" 5028 /*@C 5029 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5030 5031 Collective on Mat 5032 5033 Input Parameters: 5034 + A,B - the matrices in mpiaij format 5035 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5036 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5037 5038 Output Parameter: 5039 + rowb, colb - index sets of rows and columns of B to extract 5040 - B_seq - the sequential matrix generated 5041 5042 Level: developer 5043 5044 @*/ 5045 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5046 { 5047 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5048 PetscErrorCode ierr; 5049 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5050 IS isrowb,iscolb; 5051 Mat *bseq=PETSC_NULL; 5052 5053 PetscFunctionBegin; 5054 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5055 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5056 } 5057 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5058 5059 if (scall == MAT_INITIAL_MATRIX){ 5060 start = A->cmap->rstart; 5061 cmap = a->garray; 5062 nzA = a->A->cmap->n; 5063 nzB = a->B->cmap->n; 5064 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5065 ncols = 0; 5066 for (i=0; i<nzB; i++) { /* row < local row index */ 5067 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5068 else break; 5069 } 5070 imark = i; 5071 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5072 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5073 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5074 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5075 } else { 5076 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5077 isrowb = *rowb; iscolb = *colb; 5078 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5079 bseq[0] = *B_seq; 5080 } 5081 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5082 *B_seq = bseq[0]; 5083 ierr = PetscFree(bseq);CHKERRQ(ierr); 5084 if (!rowb){ 5085 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5086 } else { 5087 *rowb = isrowb; 5088 } 5089 if (!colb){ 5090 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5091 } else { 5092 *colb = iscolb; 5093 } 5094 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5095 PetscFunctionReturn(0); 5096 } 5097 5098 #undef __FUNCT__ 5099 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5100 /* 5101 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5102 of the OFF-DIAGONAL portion of local A 5103 5104 Collective on Mat 5105 5106 Input Parameters: 5107 + A,B - the matrices in mpiaij format 5108 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5109 5110 Output Parameter: 5111 + startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5112 . startsj_r - similar to startsj for receives 5113 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5114 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5115 5116 Level: developer 5117 5118 */ 5119 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5120 { 5121 VecScatter_MPI_General *gen_to,*gen_from; 5122 PetscErrorCode ierr; 5123 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5124 Mat_SeqAIJ *b_oth; 5125 VecScatter ctx=a->Mvctx; 5126 MPI_Comm comm=((PetscObject)ctx)->comm; 5127 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5128 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5129 PetscScalar *rvalues,*svalues; 5130 MatScalar *b_otha,*bufa,*bufA; 5131 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5132 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5133 MPI_Status *sstatus,rstatus; 5134 PetscMPIInt jj; 5135 PetscInt *cols,sbs,rbs; 5136 PetscScalar *vals; 5137 5138 PetscFunctionBegin; 5139 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5140 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5141 } 5142 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5143 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5144 5145 gen_to = (VecScatter_MPI_General*)ctx->todata; 5146 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5147 rvalues = gen_from->values; /* holds the length of receiving row */ 5148 svalues = gen_to->values; /* holds the length of sending row */ 5149 nrecvs = gen_from->n; 5150 nsends = gen_to->n; 5151 5152 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5153 srow = gen_to->indices; /* local row index to be sent */ 5154 sstarts = gen_to->starts; 5155 sprocs = gen_to->procs; 5156 sstatus = gen_to->sstatus; 5157 sbs = gen_to->bs; 5158 rstarts = gen_from->starts; 5159 rprocs = gen_from->procs; 5160 rbs = gen_from->bs; 5161 5162 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5163 if (scall == MAT_INITIAL_MATRIX){ 5164 /* i-array */ 5165 /*---------*/ 5166 /* post receives */ 5167 for (i=0; i<nrecvs; i++){ 5168 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5169 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5170 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5171 } 5172 5173 /* pack the outgoing message */ 5174 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5175 sstartsj[0] = 0; rstartsj[0] = 0; 5176 len = 0; /* total length of j or a array to be sent */ 5177 k = 0; 5178 for (i=0; i<nsends; i++){ 5179 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5180 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5181 for (j=0; j<nrows; j++) { 5182 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5183 for (l=0; l<sbs; l++){ 5184 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5185 rowlen[j*sbs+l] = ncols; 5186 len += ncols; 5187 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5188 } 5189 k++; 5190 } 5191 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5192 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5193 } 5194 /* recvs and sends of i-array are completed */ 5195 i = nrecvs; 5196 while (i--) { 5197 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5198 } 5199 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5200 5201 /* allocate buffers for sending j and a arrays */ 5202 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5203 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5204 5205 /* create i-array of B_oth */ 5206 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5207 b_othi[0] = 0; 5208 len = 0; /* total length of j or a array to be received */ 5209 k = 0; 5210 for (i=0; i<nrecvs; i++){ 5211 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5212 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5213 for (j=0; j<nrows; j++) { 5214 b_othi[k+1] = b_othi[k] + rowlen[j]; 5215 len += rowlen[j]; k++; 5216 } 5217 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5218 } 5219 5220 /* allocate space for j and a arrrays of B_oth */ 5221 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5222 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5223 5224 /* j-array */ 5225 /*---------*/ 5226 /* post receives of j-array */ 5227 for (i=0; i<nrecvs; i++){ 5228 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5229 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5230 } 5231 5232 /* pack the outgoing message j-array */ 5233 k = 0; 5234 for (i=0; i<nsends; i++){ 5235 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5236 bufJ = bufj+sstartsj[i]; 5237 for (j=0; j<nrows; j++) { 5238 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5239 for (ll=0; ll<sbs; ll++){ 5240 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5241 for (l=0; l<ncols; l++){ 5242 *bufJ++ = cols[l]; 5243 } 5244 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5245 } 5246 } 5247 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5248 } 5249 5250 /* recvs and sends of j-array are completed */ 5251 i = nrecvs; 5252 while (i--) { 5253 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5254 } 5255 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5256 } else if (scall == MAT_REUSE_MATRIX){ 5257 sstartsj = *startsj; 5258 rstartsj = *startsj_r; 5259 bufa = *bufa_ptr; 5260 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5261 b_otha = b_oth->a; 5262 } else { 5263 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5264 } 5265 5266 /* a-array */ 5267 /*---------*/ 5268 /* post receives of a-array */ 5269 for (i=0; i<nrecvs; i++){ 5270 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5271 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5272 } 5273 5274 /* pack the outgoing message a-array */ 5275 k = 0; 5276 for (i=0; i<nsends; i++){ 5277 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5278 bufA = bufa+sstartsj[i]; 5279 for (j=0; j<nrows; j++) { 5280 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5281 for (ll=0; ll<sbs; ll++){ 5282 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5283 for (l=0; l<ncols; l++){ 5284 *bufA++ = vals[l]; 5285 } 5286 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5287 } 5288 } 5289 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5290 } 5291 /* recvs and sends of a-array are completed */ 5292 i = nrecvs; 5293 while (i--) { 5294 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5295 } 5296 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5297 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5298 5299 if (scall == MAT_INITIAL_MATRIX){ 5300 /* put together the new matrix */ 5301 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5302 5303 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5304 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5305 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5306 b_oth->free_a = PETSC_TRUE; 5307 b_oth->free_ij = PETSC_TRUE; 5308 b_oth->nonew = 0; 5309 5310 ierr = PetscFree(bufj);CHKERRQ(ierr); 5311 if (!startsj || !bufa_ptr){ 5312 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5313 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5314 } else { 5315 *startsj = sstartsj; 5316 *startsj_r = rstartsj; 5317 *bufa_ptr = bufa; 5318 } 5319 } 5320 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5321 PetscFunctionReturn(0); 5322 } 5323 5324 #undef __FUNCT__ 5325 #define __FUNCT__ "MatGetCommunicationStructs" 5326 /*@C 5327 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5328 5329 Not Collective 5330 5331 Input Parameters: 5332 . A - The matrix in mpiaij format 5333 5334 Output Parameter: 5335 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5336 . colmap - A map from global column index to local index into lvec 5337 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5338 5339 Level: developer 5340 5341 @*/ 5342 #if defined (PETSC_USE_CTABLE) 5343 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5344 #else 5345 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5346 #endif 5347 { 5348 Mat_MPIAIJ *a; 5349 5350 PetscFunctionBegin; 5351 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5352 PetscValidPointer(lvec, 2); 5353 PetscValidPointer(colmap, 3); 5354 PetscValidPointer(multScatter, 4); 5355 a = (Mat_MPIAIJ *) A->data; 5356 if (lvec) *lvec = a->lvec; 5357 if (colmap) *colmap = a->colmap; 5358 if (multScatter) *multScatter = a->Mvctx; 5359 PetscFunctionReturn(0); 5360 } 5361 5362 EXTERN_C_BEGIN 5363 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5364 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5365 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5366 EXTERN_C_END 5367 5368 #undef __FUNCT__ 5369 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5370 /* 5371 Computes (B'*A')' since computing B*A directly is untenable 5372 5373 n p p 5374 ( ) ( ) ( ) 5375 m ( A ) * n ( B ) = m ( C ) 5376 ( ) ( ) ( ) 5377 5378 */ 5379 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5380 { 5381 PetscErrorCode ierr; 5382 Mat At,Bt,Ct; 5383 5384 PetscFunctionBegin; 5385 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5386 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5387 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5388 ierr = MatDestroy(&At);CHKERRQ(ierr); 5389 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5390 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5391 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5392 PetscFunctionReturn(0); 5393 } 5394 5395 #undef __FUNCT__ 5396 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5397 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5398 { 5399 PetscErrorCode ierr; 5400 PetscInt m=A->rmap->n,n=B->cmap->n; 5401 Mat Cmat; 5402 5403 PetscFunctionBegin; 5404 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5405 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5406 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5407 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5408 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5409 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5410 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5411 *C = Cmat; 5412 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5413 PetscFunctionReturn(0); 5414 } 5415 5416 /* ----------------------------------------------------------------*/ 5417 #undef __FUNCT__ 5418 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5419 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5420 { 5421 PetscErrorCode ierr; 5422 5423 PetscFunctionBegin; 5424 if (scall == MAT_INITIAL_MATRIX){ 5425 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5426 } 5427 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5428 PetscFunctionReturn(0); 5429 } 5430 5431 EXTERN_C_BEGIN 5432 #if defined(PETSC_HAVE_MUMPS) 5433 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5434 #endif 5435 #if defined(PETSC_HAVE_PASTIX) 5436 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5437 #endif 5438 #if defined(PETSC_HAVE_SUPERLU_DIST) 5439 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5440 #endif 5441 #if defined(PETSC_HAVE_SPOOLES) 5442 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5443 #endif 5444 EXTERN_C_END 5445 5446 /*MC 5447 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5448 5449 Options Database Keys: 5450 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5451 5452 Level: beginner 5453 5454 .seealso: MatCreateMPIAIJ() 5455 M*/ 5456 5457 EXTERN_C_BEGIN 5458 #undef __FUNCT__ 5459 #define __FUNCT__ "MatCreate_MPIAIJ" 5460 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5461 { 5462 Mat_MPIAIJ *b; 5463 PetscErrorCode ierr; 5464 PetscMPIInt size; 5465 5466 PetscFunctionBegin; 5467 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5468 5469 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5470 B->data = (void*)b; 5471 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5472 B->rmap->bs = 1; 5473 B->assembled = PETSC_FALSE; 5474 5475 B->insertmode = NOT_SET_VALUES; 5476 b->size = size; 5477 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5478 5479 /* build cache for off array entries formed */ 5480 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5481 b->donotstash = PETSC_FALSE; 5482 b->colmap = 0; 5483 b->garray = 0; 5484 b->roworiented = PETSC_TRUE; 5485 5486 /* stuff used for matrix vector multiply */ 5487 b->lvec = PETSC_NULL; 5488 b->Mvctx = PETSC_NULL; 5489 5490 /* stuff for MatGetRow() */ 5491 b->rowindices = 0; 5492 b->rowvalues = 0; 5493 b->getrowactive = PETSC_FALSE; 5494 5495 #if defined(PETSC_HAVE_SPOOLES) 5496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5497 "MatGetFactor_mpiaij_spooles", 5498 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5499 #endif 5500 #if defined(PETSC_HAVE_MUMPS) 5501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5502 "MatGetFactor_aij_mumps", 5503 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5504 #endif 5505 #if defined(PETSC_HAVE_PASTIX) 5506 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5507 "MatGetFactor_mpiaij_pastix", 5508 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5509 #endif 5510 #if defined(PETSC_HAVE_SUPERLU_DIST) 5511 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5512 "MatGetFactor_mpiaij_superlu_dist", 5513 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5514 #endif 5515 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5516 "MatStoreValues_MPIAIJ", 5517 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5518 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5519 "MatRetrieveValues_MPIAIJ", 5520 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5521 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5522 "MatGetDiagonalBlock_MPIAIJ", 5523 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5524 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5525 "MatIsTranspose_MPIAIJ", 5526 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5527 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5528 "MatMPIAIJSetPreallocation_MPIAIJ", 5529 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5530 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5531 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5532 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5533 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5534 "MatDiagonalScaleLocal_MPIAIJ", 5535 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5536 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5537 "MatConvert_MPIAIJ_MPIAIJPERM", 5538 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5539 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5540 "MatConvert_MPIAIJ_MPIAIJCRL", 5541 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5542 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5543 "MatConvert_MPIAIJ_MPISBAIJ", 5544 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5545 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5546 "MatMatMult_MPIDense_MPIAIJ", 5547 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5548 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5549 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5550 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5551 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5552 "MatMatMultNumeric_MPIDense_MPIAIJ", 5553 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5554 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5555 PetscFunctionReturn(0); 5556 } 5557 EXTERN_C_END 5558 5559 #undef __FUNCT__ 5560 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5561 /*@ 5562 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5563 and "off-diagonal" part of the matrix in CSR format. 5564 5565 Collective on MPI_Comm 5566 5567 Input Parameters: 5568 + comm - MPI communicator 5569 . m - number of local rows (Cannot be PETSC_DECIDE) 5570 . n - This value should be the same as the local size used in creating the 5571 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5572 calculated if N is given) For square matrices n is almost always m. 5573 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5574 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5575 . i - row indices for "diagonal" portion of matrix 5576 . j - column indices 5577 . a - matrix values 5578 . oi - row indices for "off-diagonal" portion of matrix 5579 . oj - column indices 5580 - oa - matrix values 5581 5582 Output Parameter: 5583 . mat - the matrix 5584 5585 Level: advanced 5586 5587 Notes: 5588 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5589 must free the arrays once the matrix has been destroyed and not before. 5590 5591 The i and j indices are 0 based 5592 5593 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5594 5595 This sets local rows and cannot be used to set off-processor values. 5596 5597 You cannot later use MatSetValues() to change values in this matrix. 5598 5599 .keywords: matrix, aij, compressed row, sparse, parallel 5600 5601 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5602 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5603 @*/ 5604 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5605 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5606 { 5607 PetscErrorCode ierr; 5608 Mat_MPIAIJ *maij; 5609 5610 PetscFunctionBegin; 5611 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5612 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5613 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5614 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5615 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5616 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5617 maij = (Mat_MPIAIJ*) (*mat)->data; 5618 maij->donotstash = PETSC_TRUE; 5619 (*mat)->preallocated = PETSC_TRUE; 5620 5621 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5622 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5623 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5624 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5625 5626 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5627 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5628 5629 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5630 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5631 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5632 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5633 5634 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5635 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5636 PetscFunctionReturn(0); 5637 } 5638 5639 /* 5640 Special version for direct calls from Fortran 5641 */ 5642 #include <private/fortranimpl.h> 5643 5644 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5645 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5646 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5647 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5648 #endif 5649 5650 /* Change these macros so can be used in void function */ 5651 #undef CHKERRQ 5652 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5653 #undef SETERRQ2 5654 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5655 #undef SETERRQ 5656 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5657 5658 EXTERN_C_BEGIN 5659 #undef __FUNCT__ 5660 #define __FUNCT__ "matsetvaluesmpiaij_" 5661 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5662 { 5663 Mat mat = *mmat; 5664 PetscInt m = *mm, n = *mn; 5665 InsertMode addv = *maddv; 5666 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5667 PetscScalar value; 5668 PetscErrorCode ierr; 5669 5670 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5671 if (mat->insertmode == NOT_SET_VALUES) { 5672 mat->insertmode = addv; 5673 } 5674 #if defined(PETSC_USE_DEBUG) 5675 else if (mat->insertmode != addv) { 5676 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5677 } 5678 #endif 5679 { 5680 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5681 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5682 PetscBool roworiented = aij->roworiented; 5683 5684 /* Some Variables required in the macro */ 5685 Mat A = aij->A; 5686 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5687 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5688 MatScalar *aa = a->a; 5689 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5690 Mat B = aij->B; 5691 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5692 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5693 MatScalar *ba = b->a; 5694 5695 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5696 PetscInt nonew = a->nonew; 5697 MatScalar *ap1,*ap2; 5698 5699 PetscFunctionBegin; 5700 for (i=0; i<m; i++) { 5701 if (im[i] < 0) continue; 5702 #if defined(PETSC_USE_DEBUG) 5703 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5704 #endif 5705 if (im[i] >= rstart && im[i] < rend) { 5706 row = im[i] - rstart; 5707 lastcol1 = -1; 5708 rp1 = aj + ai[row]; 5709 ap1 = aa + ai[row]; 5710 rmax1 = aimax[row]; 5711 nrow1 = ailen[row]; 5712 low1 = 0; 5713 high1 = nrow1; 5714 lastcol2 = -1; 5715 rp2 = bj + bi[row]; 5716 ap2 = ba + bi[row]; 5717 rmax2 = bimax[row]; 5718 nrow2 = bilen[row]; 5719 low2 = 0; 5720 high2 = nrow2; 5721 5722 for (j=0; j<n; j++) { 5723 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5724 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5725 if (in[j] >= cstart && in[j] < cend){ 5726 col = in[j] - cstart; 5727 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5728 } else if (in[j] < 0) continue; 5729 #if defined(PETSC_USE_DEBUG) 5730 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5731 #endif 5732 else { 5733 if (mat->was_assembled) { 5734 if (!aij->colmap) { 5735 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5736 } 5737 #if defined (PETSC_USE_CTABLE) 5738 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5739 col--; 5740 #else 5741 col = aij->colmap[in[j]] - 1; 5742 #endif 5743 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5744 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5745 col = in[j]; 5746 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5747 B = aij->B; 5748 b = (Mat_SeqAIJ*)B->data; 5749 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5750 rp2 = bj + bi[row]; 5751 ap2 = ba + bi[row]; 5752 rmax2 = bimax[row]; 5753 nrow2 = bilen[row]; 5754 low2 = 0; 5755 high2 = nrow2; 5756 bm = aij->B->rmap->n; 5757 ba = b->a; 5758 } 5759 } else col = in[j]; 5760 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5761 } 5762 } 5763 } else { 5764 if (!aij->donotstash) { 5765 if (roworiented) { 5766 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5767 } else { 5768 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5769 } 5770 } 5771 } 5772 }} 5773 PetscFunctionReturnVoid(); 5774 } 5775 EXTERN_C_END 5776 5777