1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = sqrt(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 876 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 877 { 878 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 879 PetscErrorCode ierr; 880 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 881 PetscInt i,*owners = A->rmap->range; 882 PetscInt *nprocs,j,idx,nsends; 883 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 884 PetscInt *rvalues,count,base,slen,*source; 885 PetscInt *lens,*lrows,*values,m; 886 MPI_Comm comm = ((PetscObject)A)->comm; 887 MPI_Request *send_waits,*recv_waits; 888 MPI_Status recv_status,*send_status; 889 const PetscScalar *xx; 890 PetscScalar *bb,*mask; 891 Vec xmask,lmask; 892 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 893 const PetscInt *aj, *ii,*ridx; 894 PetscScalar *aa; 895 #if defined(PETSC_DEBUG) 896 PetscBool found = PETSC_FALSE; 897 #endif 898 899 PetscFunctionBegin; 900 /* first count number of contributors to each processor */ 901 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 902 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 903 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 904 j = 0; 905 for (i=0; i<N; i++) { 906 if (lastidx > (idx = rows[i])) j = 0; 907 lastidx = idx; 908 for (; j<size; j++) { 909 if (idx >= owners[j] && idx < owners[j+1]) { 910 nprocs[2*j]++; 911 nprocs[2*j+1] = 1; 912 owner[i] = j; 913 #if defined(PETSC_DEBUG) 914 found = PETSC_TRUE; 915 #endif 916 break; 917 } 918 } 919 #if defined(PETSC_DEBUG) 920 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 921 found = PETSC_FALSE; 922 #endif 923 } 924 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 925 926 /* inform other processors of number of messages and max length*/ 927 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 928 929 /* post receives: */ 930 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 931 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 932 for (i=0; i<nrecvs; i++) { 933 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 934 } 935 936 /* do sends: 937 1) starts[i] gives the starting index in svalues for stuff going to 938 the ith processor 939 */ 940 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 941 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 942 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 943 starts[0] = 0; 944 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 945 for (i=0; i<N; i++) { 946 svalues[starts[owner[i]]++] = rows[i]; 947 } 948 949 starts[0] = 0; 950 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 951 count = 0; 952 for (i=0; i<size; i++) { 953 if (nprocs[2*i+1]) { 954 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 955 } 956 } 957 ierr = PetscFree(starts);CHKERRQ(ierr); 958 959 base = owners[rank]; 960 961 /* wait on receives */ 962 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 963 count = nrecvs; slen = 0; 964 while (count) { 965 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 966 /* unpack receives into our local space */ 967 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 968 source[imdex] = recv_status.MPI_SOURCE; 969 lens[imdex] = n; 970 slen += n; 971 count--; 972 } 973 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 974 975 /* move the data into the send scatter */ 976 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 977 count = 0; 978 for (i=0; i<nrecvs; i++) { 979 values = rvalues + i*nmax; 980 for (j=0; j<lens[i]; j++) { 981 lrows[count++] = values[j] - base; 982 } 983 } 984 ierr = PetscFree(rvalues);CHKERRQ(ierr); 985 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 986 ierr = PetscFree(owner);CHKERRQ(ierr); 987 ierr = PetscFree(nprocs);CHKERRQ(ierr); 988 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 989 990 /* zero diagonal part of matrix */ 991 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 992 993 /* handle off diagonal part of matrix */ 994 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 995 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 996 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 997 for (i=0; i<slen; i++) { 998 bb[lrows[i]] = 1; 999 } 1000 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1001 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1003 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1004 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1007 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1008 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1009 1010 /* remove zeroed rows of off diagonal matrix */ 1011 ii = aij->i; 1012 for (i=0; i<slen; i++) { 1013 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1014 } 1015 1016 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1017 if (aij->compressedrow.use){ 1018 m = aij->compressedrow.nrows; 1019 ii = aij->compressedrow.i; 1020 ridx = aij->compressedrow.rindex; 1021 for (i=0; i<m; i++){ 1022 n = ii[i+1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij->a + ii[i]; 1025 1026 for (j=0; j<n; j++) { 1027 if (PetscAbsScalar(mask[*aj])) { 1028 bb[*ridx] -= *aa*xx[*aj]; 1029 *aa = 0.0; 1030 } 1031 aa++; 1032 aj++; 1033 } 1034 ridx++; 1035 } 1036 } else { /* do not use compressed row format */ 1037 m = l->B->rmap->n; 1038 for (i=0; i<m; i++) { 1039 n = ii[i+1] - ii[i]; 1040 aj = aij->j + ii[i]; 1041 aa = aij->a + ii[i]; 1042 for (j=0; j<n; j++) { 1043 if (PetscAbsScalar(mask[*aj])) { 1044 bb[i] -= *aa*xx[*aj]; 1045 *aa = 0.0; 1046 } 1047 aa++; 1048 aj++; 1049 } 1050 } 1051 } 1052 1053 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1054 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1055 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1056 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1057 ierr = PetscFree(lrows);CHKERRQ(ierr); 1058 1059 /* wait on sends */ 1060 if (nsends) { 1061 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1062 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1063 ierr = PetscFree(send_status);CHKERRQ(ierr); 1064 } 1065 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1066 ierr = PetscFree(svalues);CHKERRQ(ierr); 1067 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatMult_MPIAIJ" 1073 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1074 { 1075 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1076 PetscErrorCode ierr; 1077 PetscInt nt; 1078 1079 PetscFunctionBegin; 1080 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1081 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1082 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1083 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1084 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1085 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1091 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1092 { 1093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1103 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1111 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1118 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1119 { 1120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1121 PetscErrorCode ierr; 1122 PetscBool merged; 1123 1124 PetscFunctionBegin; 1125 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1126 /* do nondiagonal part */ 1127 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1128 if (!merged) { 1129 /* send it on its way */ 1130 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1131 /* do local part */ 1132 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1133 /* receive remote parts: note this assumes the values are not actually */ 1134 /* added in yy until the next line, */ 1135 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1136 } else { 1137 /* do local part */ 1138 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1139 /* send it on its way */ 1140 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 /* values actually were received in the Begin() but we need to call this nop */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } 1144 PetscFunctionReturn(0); 1145 } 1146 1147 EXTERN_C_BEGIN 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1150 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1151 { 1152 MPI_Comm comm; 1153 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1154 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1155 IS Me,Notme; 1156 PetscErrorCode ierr; 1157 PetscInt M,N,first,last,*notme,i; 1158 PetscMPIInt size; 1159 1160 PetscFunctionBegin; 1161 1162 /* Easy test: symmetric diagonal block */ 1163 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1164 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1165 if (!*f) PetscFunctionReturn(0); 1166 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1167 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1168 if (size == 1) PetscFunctionReturn(0); 1169 1170 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1171 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1172 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1173 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1174 for (i=0; i<first; i++) notme[i] = i; 1175 for (i=last; i<M; i++) notme[i-last+first] = i; 1176 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1177 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1178 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1179 Aoff = Aoffs[0]; 1180 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1181 Boff = Boffs[0]; 1182 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1183 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1184 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1185 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1186 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1187 ierr = PetscFree(notme);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 EXTERN_C_END 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1194 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1195 { 1196 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 /* do nondiagonal part */ 1201 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1202 /* send it on its way */ 1203 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1204 /* do local part */ 1205 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1206 /* receive remote parts */ 1207 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 /* 1212 This only works correctly for square matrices where the subblock A->A is the 1213 diagonal block 1214 */ 1215 #undef __FUNCT__ 1216 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1217 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1218 { 1219 PetscErrorCode ierr; 1220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1221 1222 PetscFunctionBegin; 1223 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1224 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1225 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatScale_MPIAIJ" 1231 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1232 { 1233 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1234 PetscErrorCode ierr; 1235 1236 PetscFunctionBegin; 1237 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1238 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatDestroy_MPIAIJ" 1244 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1245 { 1246 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 #if defined(PETSC_USE_LOG) 1251 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1252 #endif 1253 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1254 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1255 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1256 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1257 #if defined (PETSC_USE_CTABLE) 1258 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1259 #else 1260 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1261 #endif 1262 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1263 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1264 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1265 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1266 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1267 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1268 1269 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1283 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1286 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1287 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1288 PetscErrorCode ierr; 1289 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1290 int fd; 1291 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1292 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1293 PetscScalar *column_values; 1294 PetscInt message_count,flowcontrolcount; 1295 1296 PetscFunctionBegin; 1297 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1298 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1299 nz = A->nz + B->nz; 1300 if (!rank) { 1301 header[0] = MAT_FILE_CLASSID; 1302 header[1] = mat->rmap->N; 1303 header[2] = mat->cmap->N; 1304 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1305 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1306 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1307 /* get largest number of rows any processor has */ 1308 rlen = mat->rmap->n; 1309 range = mat->rmap->range; 1310 for (i=1; i<size; i++) { 1311 rlen = PetscMax(rlen,range[i+1] - range[i]); 1312 } 1313 } else { 1314 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1315 rlen = mat->rmap->n; 1316 } 1317 1318 /* load up the local row counts */ 1319 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1320 for (i=0; i<mat->rmap->n; i++) { 1321 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1322 } 1323 1324 /* store the row lengths to the file */ 1325 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1326 if (!rank) { 1327 MPI_Status status; 1328 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1329 for (i=1; i<size; i++) { 1330 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1331 rlen = range[i+1] - range[i]; 1332 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1333 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1334 } 1335 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1336 } else { 1337 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1338 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1340 } 1341 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1342 1343 /* load up the local column indices */ 1344 nzmax = nz; /* )th processor needs space a largest processor needs */ 1345 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1346 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1347 cnt = 0; 1348 for (i=0; i<mat->rmap->n; i++) { 1349 for (j=B->i[i]; j<B->i[i+1]; j++) { 1350 if ( (col = garray[B->j[j]]) > cstart) break; 1351 column_indices[cnt++] = col; 1352 } 1353 for (k=A->i[i]; k<A->i[i+1]; k++) { 1354 column_indices[cnt++] = A->j[k] + cstart; 1355 } 1356 for (; j<B->i[i+1]; j++) { 1357 column_indices[cnt++] = garray[B->j[j]]; 1358 } 1359 } 1360 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1361 1362 /* store the column indices to the file */ 1363 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1364 if (!rank) { 1365 MPI_Status status; 1366 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1367 for (i=1; i<size; i++) { 1368 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1369 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1370 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1371 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1372 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1373 } 1374 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1375 } else { 1376 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1377 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1378 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1380 } 1381 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1382 1383 /* load up the local column values */ 1384 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1385 cnt = 0; 1386 for (i=0; i<mat->rmap->n; i++) { 1387 for (j=B->i[i]; j<B->i[i+1]; j++) { 1388 if ( garray[B->j[j]] > cstart) break; 1389 column_values[cnt++] = B->a[j]; 1390 } 1391 for (k=A->i[i]; k<A->i[i+1]; k++) { 1392 column_values[cnt++] = A->a[k]; 1393 } 1394 for (; j<B->i[i+1]; j++) { 1395 column_values[cnt++] = B->a[j]; 1396 } 1397 } 1398 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1399 1400 /* store the column values to the file */ 1401 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1402 if (!rank) { 1403 MPI_Status status; 1404 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1405 for (i=1; i<size; i++) { 1406 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1407 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1408 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1409 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1410 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1411 } 1412 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1413 } else { 1414 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1415 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1416 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1418 } 1419 ierr = PetscFree(column_values);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1425 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1426 { 1427 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1428 PetscErrorCode ierr; 1429 PetscMPIInt rank = aij->rank,size = aij->size; 1430 PetscBool isdraw,iascii,isbinary; 1431 PetscViewer sviewer; 1432 PetscViewerFormat format; 1433 1434 PetscFunctionBegin; 1435 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1436 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1437 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1438 if (iascii) { 1439 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1440 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1441 MatInfo info; 1442 PetscBool inodes; 1443 1444 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1445 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1446 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1447 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1448 if (!inodes) { 1449 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1450 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1451 } else { 1452 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1453 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1454 } 1455 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1456 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1457 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1458 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1459 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1462 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1465 PetscInt inodecount,inodelimit,*inodes; 1466 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1467 if (inodes) { 1468 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1469 } else { 1470 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1471 } 1472 PetscFunctionReturn(0); 1473 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1474 PetscFunctionReturn(0); 1475 } 1476 } else if (isbinary) { 1477 if (size == 1) { 1478 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1479 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1480 } else { 1481 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } else if (isdraw) { 1485 PetscDraw draw; 1486 PetscBool isnull; 1487 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1488 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1489 } 1490 1491 if (size == 1) { 1492 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1493 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1494 } else { 1495 /* assemble the entire matrix onto first processor. */ 1496 Mat A; 1497 Mat_SeqAIJ *Aloc; 1498 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1499 MatScalar *a; 1500 1501 if (mat->rmap->N > 1024) { 1502 PetscBool flg = PETSC_FALSE; 1503 1504 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1505 if (!flg) { 1506 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1507 } 1508 } 1509 1510 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1511 if (!rank) { 1512 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1513 } else { 1514 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1515 } 1516 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1517 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1518 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1519 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1520 1521 /* copy over the A part */ 1522 Aloc = (Mat_SeqAIJ*)aij->A->data; 1523 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1524 row = mat->rmap->rstart; 1525 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1526 for (i=0; i<m; i++) { 1527 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1528 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1529 } 1530 aj = Aloc->j; 1531 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1532 1533 /* copy over the B part */ 1534 Aloc = (Mat_SeqAIJ*)aij->B->data; 1535 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1536 row = mat->rmap->rstart; 1537 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1538 ct = cols; 1539 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1540 for (i=0; i<m; i++) { 1541 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1542 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1543 } 1544 ierr = PetscFree(ct);CHKERRQ(ierr); 1545 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1546 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1547 /* 1548 Everyone has to call to draw the matrix since the graphics waits are 1549 synchronized across all processors that share the PetscDraw object 1550 */ 1551 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1552 if (!rank) { 1553 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1554 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1555 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1556 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1557 } 1558 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1559 ierr = MatDestroy(&A);CHKERRQ(ierr); 1560 } 1561 PetscFunctionReturn(0); 1562 } 1563 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MatView_MPIAIJ" 1566 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1567 { 1568 PetscErrorCode ierr; 1569 PetscBool iascii,isdraw,issocket,isbinary; 1570 1571 PetscFunctionBegin; 1572 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1573 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1574 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1575 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1576 if (iascii || isdraw || isbinary || issocket) { 1577 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1578 } else { 1579 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatSOR_MPIAIJ" 1586 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1587 { 1588 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1589 PetscErrorCode ierr; 1590 Vec bb1 = 0; 1591 PetscBool hasop; 1592 1593 PetscFunctionBegin; 1594 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1595 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1596 } 1597 1598 if (flag == SOR_APPLY_UPPER) { 1599 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1600 PetscFunctionReturn(0); 1601 } 1602 1603 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1604 if (flag & SOR_ZERO_INITIAL_GUESS) { 1605 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1606 its--; 1607 } 1608 1609 while (its--) { 1610 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1611 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 1613 /* update rhs: bb1 = bb - B*x */ 1614 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1615 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1616 1617 /* local sweep */ 1618 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1619 } 1620 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1621 if (flag & SOR_ZERO_INITIAL_GUESS) { 1622 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1623 its--; 1624 } 1625 while (its--) { 1626 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1627 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 1629 /* update rhs: bb1 = bb - B*x */ 1630 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1631 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1632 1633 /* local sweep */ 1634 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1635 } 1636 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1637 if (flag & SOR_ZERO_INITIAL_GUESS) { 1638 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1639 its--; 1640 } 1641 while (its--) { 1642 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 1645 /* update rhs: bb1 = bb - B*x */ 1646 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1647 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1648 1649 /* local sweep */ 1650 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1651 } 1652 } else if (flag & SOR_EISENSTAT) { 1653 Vec xx1; 1654 1655 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1657 1658 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1659 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 if (!mat->diag) { 1661 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1662 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1663 } 1664 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1665 if (hasop) { 1666 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1667 } else { 1668 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1669 } 1670 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1671 1672 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1673 1674 /* local sweep */ 1675 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1676 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1677 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1678 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1679 1680 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatPermute_MPIAIJ" 1686 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1687 { 1688 MPI_Comm comm,pcomm; 1689 PetscInt first,local_size,nrows; 1690 const PetscInt *rows; 1691 PetscMPIInt size; 1692 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1697 /* make a collective version of 'rowp' */ 1698 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1699 if (pcomm==comm) { 1700 crowp = rowp; 1701 } else { 1702 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1703 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1704 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1705 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1706 } 1707 /* collect the global row permutation and invert it */ 1708 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1709 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1710 if (pcomm!=comm) { 1711 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1712 } 1713 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1714 /* get the local target indices */ 1715 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1716 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1717 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1718 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1719 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1720 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1721 /* the column permutation is so much easier; 1722 make a local version of 'colp' and invert it */ 1723 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1724 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1725 if (size==1) { 1726 lcolp = colp; 1727 } else { 1728 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1729 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1730 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1731 } 1732 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1733 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1734 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1735 if (size>1) { 1736 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1737 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1738 } 1739 /* now we just get the submatrix */ 1740 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1741 /* clean up */ 1742 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1743 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1749 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1750 { 1751 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1752 Mat A = mat->A,B = mat->B; 1753 PetscErrorCode ierr; 1754 PetscReal isend[5],irecv[5]; 1755 1756 PetscFunctionBegin; 1757 info->block_size = 1.0; 1758 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1759 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1760 isend[3] = info->memory; isend[4] = info->mallocs; 1761 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1762 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1763 isend[3] += info->memory; isend[4] += info->mallocs; 1764 if (flag == MAT_LOCAL) { 1765 info->nz_used = isend[0]; 1766 info->nz_allocated = isend[1]; 1767 info->nz_unneeded = isend[2]; 1768 info->memory = isend[3]; 1769 info->mallocs = isend[4]; 1770 } else if (flag == MAT_GLOBAL_MAX) { 1771 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1779 info->nz_used = irecv[0]; 1780 info->nz_allocated = irecv[1]; 1781 info->nz_unneeded = irecv[2]; 1782 info->memory = irecv[3]; 1783 info->mallocs = irecv[4]; 1784 } 1785 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1786 info->fill_ratio_needed = 0; 1787 info->factor_mallocs = 0; 1788 1789 PetscFunctionReturn(0); 1790 } 1791 1792 #undef __FUNCT__ 1793 #define __FUNCT__ "MatSetOption_MPIAIJ" 1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1795 { 1796 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 switch (op) { 1801 case MAT_NEW_NONZERO_LOCATIONS: 1802 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1803 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1804 case MAT_KEEP_NONZERO_PATTERN: 1805 case MAT_NEW_NONZERO_LOCATION_ERR: 1806 case MAT_USE_INODES: 1807 case MAT_IGNORE_ZERO_ENTRIES: 1808 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1809 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1810 break; 1811 case MAT_ROW_ORIENTED: 1812 a->roworiented = flg; 1813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1814 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1815 break; 1816 case MAT_NEW_DIAGONALS: 1817 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1818 break; 1819 case MAT_IGNORE_OFF_PROC_ENTRIES: 1820 a->donotstash = PETSC_TRUE; 1821 break; 1822 case MAT_SPD: 1823 A->spd_set = PETSC_TRUE; 1824 A->spd = flg; 1825 if (flg) { 1826 A->symmetric = PETSC_TRUE; 1827 A->structurally_symmetric = PETSC_TRUE; 1828 A->symmetric_set = PETSC_TRUE; 1829 A->structurally_symmetric_set = PETSC_TRUE; 1830 } 1831 break; 1832 case MAT_SYMMETRIC: 1833 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1834 break; 1835 case MAT_STRUCTURALLY_SYMMETRIC: 1836 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1837 break; 1838 case MAT_HERMITIAN: 1839 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1840 break; 1841 case MAT_SYMMETRY_ETERNAL: 1842 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1843 break; 1844 default: 1845 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1846 } 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatGetRow_MPIAIJ" 1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1853 { 1854 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1855 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1856 PetscErrorCode ierr; 1857 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1858 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1859 PetscInt *cmap,*idx_p; 1860 1861 PetscFunctionBegin; 1862 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1863 mat->getrowactive = PETSC_TRUE; 1864 1865 if (!mat->rowvalues && (idx || v)) { 1866 /* 1867 allocate enough space to hold information from the longest row. 1868 */ 1869 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1870 PetscInt max = 1,tmp; 1871 for (i=0; i<matin->rmap->n; i++) { 1872 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1873 if (max < tmp) { max = tmp; } 1874 } 1875 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1876 } 1877 1878 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1879 lrow = row - rstart; 1880 1881 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1882 if (!v) {pvA = 0; pvB = 0;} 1883 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1884 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1885 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1886 nztot = nzA + nzB; 1887 1888 cmap = mat->garray; 1889 if (v || idx) { 1890 if (nztot) { 1891 /* Sort by increasing column numbers, assuming A and B already sorted */ 1892 PetscInt imark = -1; 1893 if (v) { 1894 *v = v_p = mat->rowvalues; 1895 for (i=0; i<nzB; i++) { 1896 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1897 else break; 1898 } 1899 imark = i; 1900 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1901 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1902 } 1903 if (idx) { 1904 *idx = idx_p = mat->rowindices; 1905 if (imark > -1) { 1906 for (i=0; i<imark; i++) { 1907 idx_p[i] = cmap[cworkB[i]]; 1908 } 1909 } else { 1910 for (i=0; i<nzB; i++) { 1911 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1912 else break; 1913 } 1914 imark = i; 1915 } 1916 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1917 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1918 } 1919 } else { 1920 if (idx) *idx = 0; 1921 if (v) *v = 0; 1922 } 1923 } 1924 *nz = nztot; 1925 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1926 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 #undef __FUNCT__ 1931 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1932 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1933 { 1934 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1935 1936 PetscFunctionBegin; 1937 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1938 aij->getrowactive = PETSC_FALSE; 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatNorm_MPIAIJ" 1944 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1945 { 1946 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1947 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1948 PetscErrorCode ierr; 1949 PetscInt i,j,cstart = mat->cmap->rstart; 1950 PetscReal sum = 0.0; 1951 MatScalar *v; 1952 1953 PetscFunctionBegin; 1954 if (aij->size == 1) { 1955 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1956 } else { 1957 if (type == NORM_FROBENIUS) { 1958 v = amat->a; 1959 for (i=0; i<amat->nz; i++) { 1960 #if defined(PETSC_USE_COMPLEX) 1961 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1962 #else 1963 sum += (*v)*(*v); v++; 1964 #endif 1965 } 1966 v = bmat->a; 1967 for (i=0; i<bmat->nz; i++) { 1968 #if defined(PETSC_USE_COMPLEX) 1969 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1970 #else 1971 sum += (*v)*(*v); v++; 1972 #endif 1973 } 1974 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1975 *norm = sqrt(*norm); 1976 } else if (type == NORM_1) { /* max column norm */ 1977 PetscReal *tmp,*tmp2; 1978 PetscInt *jj,*garray = aij->garray; 1979 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1980 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1981 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1982 *norm = 0.0; 1983 v = amat->a; jj = amat->j; 1984 for (j=0; j<amat->nz; j++) { 1985 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1986 } 1987 v = bmat->a; jj = bmat->j; 1988 for (j=0; j<bmat->nz; j++) { 1989 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1990 } 1991 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1992 for (j=0; j<mat->cmap->N; j++) { 1993 if (tmp2[j] > *norm) *norm = tmp2[j]; 1994 } 1995 ierr = PetscFree(tmp);CHKERRQ(ierr); 1996 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1997 } else if (type == NORM_INFINITY) { /* max row norm */ 1998 PetscReal ntemp = 0.0; 1999 for (j=0; j<aij->A->rmap->n; j++) { 2000 v = amat->a + amat->i[j]; 2001 sum = 0.0; 2002 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2003 sum += PetscAbsScalar(*v); v++; 2004 } 2005 v = bmat->a + bmat->i[j]; 2006 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2007 sum += PetscAbsScalar(*v); v++; 2008 } 2009 if (sum > ntemp) ntemp = sum; 2010 } 2011 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2012 } else { 2013 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2014 } 2015 } 2016 PetscFunctionReturn(0); 2017 } 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatTranspose_MPIAIJ" 2021 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2022 { 2023 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2024 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2025 PetscErrorCode ierr; 2026 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2027 PetscInt cstart=A->cmap->rstart,ncol; 2028 Mat B; 2029 MatScalar *array; 2030 2031 PetscFunctionBegin; 2032 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2033 2034 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2035 ai = Aloc->i; aj = Aloc->j; 2036 bi = Bloc->i; bj = Bloc->j; 2037 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2038 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2039 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2040 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2041 for (i=0; i<ai[ma]; i++){ 2042 d_nnz[aj[i]] ++; 2043 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2044 } 2045 2046 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2047 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2048 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2049 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2050 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2051 } else { 2052 B = *matout; 2053 } 2054 2055 /* copy over the A part */ 2056 array = Aloc->a; 2057 row = A->rmap->rstart; 2058 for (i=0; i<ma; i++) { 2059 ncol = ai[i+1]-ai[i]; 2060 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2061 row++; array += ncol; aj += ncol; 2062 } 2063 aj = Aloc->j; 2064 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2065 2066 /* copy over the B part */ 2067 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2068 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2069 array = Bloc->a; 2070 row = A->rmap->rstart; 2071 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2072 cols_tmp = cols; 2073 for (i=0; i<mb; i++) { 2074 ncol = bi[i+1]-bi[i]; 2075 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2076 row++; array += ncol; cols_tmp += ncol; 2077 } 2078 ierr = PetscFree(cols);CHKERRQ(ierr); 2079 2080 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2081 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2082 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2083 *matout = B; 2084 } else { 2085 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2086 } 2087 PetscFunctionReturn(0); 2088 } 2089 2090 #undef __FUNCT__ 2091 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2092 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2093 { 2094 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2095 Mat a = aij->A,b = aij->B; 2096 PetscErrorCode ierr; 2097 PetscInt s1,s2,s3; 2098 2099 PetscFunctionBegin; 2100 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2101 if (rr) { 2102 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2103 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2104 /* Overlap communication with computation. */ 2105 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2106 } 2107 if (ll) { 2108 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2109 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2110 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2111 } 2112 /* scale the diagonal block */ 2113 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2114 2115 if (rr) { 2116 /* Do a scatter end and then right scale the off-diagonal block */ 2117 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2119 } 2120 2121 PetscFunctionReturn(0); 2122 } 2123 2124 #undef __FUNCT__ 2125 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2126 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2127 { 2128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2129 PetscErrorCode ierr; 2130 2131 PetscFunctionBegin; 2132 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2133 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2134 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2135 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 #undef __FUNCT__ 2139 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2140 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2141 { 2142 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2143 PetscErrorCode ierr; 2144 2145 PetscFunctionBegin; 2146 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatEqual_MPIAIJ" 2152 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2153 { 2154 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2155 Mat a,b,c,d; 2156 PetscBool flg; 2157 PetscErrorCode ierr; 2158 2159 PetscFunctionBegin; 2160 a = matA->A; b = matA->B; 2161 c = matB->A; d = matB->B; 2162 2163 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2164 if (flg) { 2165 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2166 } 2167 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2168 PetscFunctionReturn(0); 2169 } 2170 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatCopy_MPIAIJ" 2173 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2174 { 2175 PetscErrorCode ierr; 2176 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2177 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2178 2179 PetscFunctionBegin; 2180 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2181 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2182 /* because of the column compression in the off-processor part of the matrix a->B, 2183 the number of columns in a->B and b->B may be different, hence we cannot call 2184 the MatCopy() directly on the two parts. If need be, we can provide a more 2185 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2186 then copying the submatrices */ 2187 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2188 } else { 2189 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2190 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2191 } 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2197 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2198 { 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "MatAXPY_MPIAIJ" 2208 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2209 { 2210 PetscErrorCode ierr; 2211 PetscInt i; 2212 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2213 PetscBLASInt bnz,one=1; 2214 Mat_SeqAIJ *x,*y; 2215 2216 PetscFunctionBegin; 2217 if (str == SAME_NONZERO_PATTERN) { 2218 PetscScalar alpha = a; 2219 x = (Mat_SeqAIJ *)xx->A->data; 2220 y = (Mat_SeqAIJ *)yy->A->data; 2221 bnz = PetscBLASIntCast(x->nz); 2222 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2223 x = (Mat_SeqAIJ *)xx->B->data; 2224 y = (Mat_SeqAIJ *)yy->B->data; 2225 bnz = PetscBLASIntCast(x->nz); 2226 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2227 } else if (str == SUBSET_NONZERO_PATTERN) { 2228 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2229 2230 x = (Mat_SeqAIJ *)xx->B->data; 2231 y = (Mat_SeqAIJ *)yy->B->data; 2232 if (y->xtoy && y->XtoY != xx->B) { 2233 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2234 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2235 } 2236 if (!y->xtoy) { /* get xtoy */ 2237 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2238 y->XtoY = xx->B; 2239 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2240 } 2241 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2242 } else { 2243 Mat B; 2244 PetscInt *nnz_d,*nnz_o; 2245 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2246 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2247 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2248 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2249 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2250 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2251 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2253 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2254 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2255 ierr = MatHeaderReplace(Y,B); 2256 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatConjugate_MPIAIJ" 2266 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2267 { 2268 #if defined(PETSC_USE_COMPLEX) 2269 PetscErrorCode ierr; 2270 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2271 2272 PetscFunctionBegin; 2273 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2274 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2275 #else 2276 PetscFunctionBegin; 2277 #endif 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatRealPart_MPIAIJ" 2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2284 { 2285 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2286 PetscErrorCode ierr; 2287 2288 PetscFunctionBegin; 2289 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2290 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2296 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2297 { 2298 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2299 PetscErrorCode ierr; 2300 2301 PetscFunctionBegin; 2302 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2303 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #ifdef PETSC_HAVE_PBGL 2308 2309 #include <boost/parallel/mpi/bsp_process_group.hpp> 2310 #include <boost/graph/distributed/ilu_default_graph.hpp> 2311 #include <boost/graph/distributed/ilu_0_block.hpp> 2312 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2313 #include <boost/graph/distributed/petsc/interface.hpp> 2314 #include <boost/multi_array.hpp> 2315 #include <boost/parallel/distributed_property_map->hpp> 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2319 /* 2320 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2321 */ 2322 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2323 { 2324 namespace petsc = boost::distributed::petsc; 2325 2326 namespace graph_dist = boost::graph::distributed; 2327 using boost::graph::distributed::ilu_default::process_group_type; 2328 using boost::graph::ilu_permuted; 2329 2330 PetscBool row_identity, col_identity; 2331 PetscContainer c; 2332 PetscInt m, n, M, N; 2333 PetscErrorCode ierr; 2334 2335 PetscFunctionBegin; 2336 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2337 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2338 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2339 if (!row_identity || !col_identity) { 2340 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2341 } 2342 2343 process_group_type pg; 2344 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2345 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2346 lgraph_type& level_graph = *lgraph_p; 2347 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2348 2349 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2350 ilu_permuted(level_graph); 2351 2352 /* put together the new matrix */ 2353 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2354 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2355 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2356 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2357 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2358 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2359 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 2361 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2362 ierr = PetscContainerSetPointer(c, lgraph_p); 2363 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2364 ierr = PetscContainerDestroy(&c); 2365 PetscFunctionReturn(0); 2366 } 2367 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2370 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2371 { 2372 PetscFunctionBegin; 2373 PetscFunctionReturn(0); 2374 } 2375 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatSolve_MPIAIJ" 2378 /* 2379 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2380 */ 2381 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2382 { 2383 namespace graph_dist = boost::graph::distributed; 2384 2385 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2386 lgraph_type* lgraph_p; 2387 PetscContainer c; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2392 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2393 ierr = VecCopy(b, x);CHKERRQ(ierr); 2394 2395 PetscScalar* array_x; 2396 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2397 PetscInt sx; 2398 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2399 2400 PetscScalar* array_b; 2401 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2402 PetscInt sb; 2403 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2404 2405 lgraph_type& level_graph = *lgraph_p; 2406 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2407 2408 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2409 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2410 ref_x(array_x, boost::extents[num_vertices(graph)]); 2411 2412 typedef boost::iterator_property_map<array_ref_type::iterator, 2413 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2414 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2415 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2416 2417 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2418 2419 PetscFunctionReturn(0); 2420 } 2421 #endif 2422 2423 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2424 PetscInt nzlocal,nsends,nrecvs; 2425 PetscMPIInt *send_rank,*recv_rank; 2426 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2427 PetscScalar *sbuf_a,**rbuf_a; 2428 PetscErrorCode (*Destroy)(Mat); 2429 } Mat_Redundant; 2430 2431 #undef __FUNCT__ 2432 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2433 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2434 { 2435 PetscErrorCode ierr; 2436 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2437 PetscInt i; 2438 2439 PetscFunctionBegin; 2440 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2441 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2442 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2443 for (i=0; i<redund->nrecvs; i++){ 2444 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2445 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2446 } 2447 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2448 ierr = PetscFree(redund);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatDestroy_MatRedundant" 2454 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2455 { 2456 PetscErrorCode ierr; 2457 PetscContainer container; 2458 Mat_Redundant *redund=PETSC_NULL; 2459 2460 PetscFunctionBegin; 2461 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2462 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2463 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2464 A->ops->destroy = redund->Destroy; 2465 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2466 if (A->ops->destroy) { 2467 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2468 } 2469 PetscFunctionReturn(0); 2470 } 2471 2472 #undef __FUNCT__ 2473 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2474 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2475 { 2476 PetscMPIInt rank,size; 2477 MPI_Comm comm=((PetscObject)mat)->comm; 2478 PetscErrorCode ierr; 2479 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2480 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2481 PetscInt *rowrange=mat->rmap->range; 2482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2483 Mat A=aij->A,B=aij->B,C=*matredundant; 2484 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2485 PetscScalar *sbuf_a; 2486 PetscInt nzlocal=a->nz+b->nz; 2487 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2488 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2489 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2490 MatScalar *aworkA,*aworkB; 2491 PetscScalar *vals; 2492 PetscMPIInt tag1,tag2,tag3,imdex; 2493 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2494 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2495 MPI_Status recv_status,*send_status; 2496 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2497 PetscInt **rbuf_j=PETSC_NULL; 2498 PetscScalar **rbuf_a=PETSC_NULL; 2499 Mat_Redundant *redund=PETSC_NULL; 2500 PetscContainer container; 2501 2502 PetscFunctionBegin; 2503 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2504 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2505 2506 if (reuse == MAT_REUSE_MATRIX) { 2507 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2508 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2509 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2510 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2511 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2512 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2513 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2514 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2515 2516 nsends = redund->nsends; 2517 nrecvs = redund->nrecvs; 2518 send_rank = redund->send_rank; 2519 recv_rank = redund->recv_rank; 2520 sbuf_nz = redund->sbuf_nz; 2521 rbuf_nz = redund->rbuf_nz; 2522 sbuf_j = redund->sbuf_j; 2523 sbuf_a = redund->sbuf_a; 2524 rbuf_j = redund->rbuf_j; 2525 rbuf_a = redund->rbuf_a; 2526 } 2527 2528 if (reuse == MAT_INITIAL_MATRIX){ 2529 PetscMPIInt subrank,subsize; 2530 PetscInt nleftover,np_subcomm; 2531 /* get the destination processors' id send_rank, nsends and nrecvs */ 2532 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2533 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2534 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2535 np_subcomm = size/nsubcomm; 2536 nleftover = size - nsubcomm*np_subcomm; 2537 nsends = 0; nrecvs = 0; 2538 for (i=0; i<size; i++){ /* i=rank*/ 2539 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2540 send_rank[nsends] = i; nsends++; 2541 recv_rank[nrecvs++] = i; 2542 } 2543 } 2544 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2545 i = size-nleftover-1; 2546 j = 0; 2547 while (j < nsubcomm - nleftover){ 2548 send_rank[nsends++] = i; 2549 i--; j++; 2550 } 2551 } 2552 2553 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2554 for (i=0; i<nleftover; i++){ 2555 recv_rank[nrecvs++] = size-nleftover+i; 2556 } 2557 } 2558 2559 /* allocate sbuf_j, sbuf_a */ 2560 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2561 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2562 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2563 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2564 2565 /* copy mat's local entries into the buffers */ 2566 if (reuse == MAT_INITIAL_MATRIX){ 2567 rownz_max = 0; 2568 rptr = sbuf_j; 2569 cols = sbuf_j + rend-rstart + 1; 2570 vals = sbuf_a; 2571 rptr[0] = 0; 2572 for (i=0; i<rend-rstart; i++){ 2573 row = i + rstart; 2574 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2575 ncols = nzA + nzB; 2576 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2577 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2578 /* load the column indices for this row into cols */ 2579 lwrite = 0; 2580 for (l=0; l<nzB; l++) { 2581 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2582 vals[lwrite] = aworkB[l]; 2583 cols[lwrite++] = ctmp; 2584 } 2585 } 2586 for (l=0; l<nzA; l++){ 2587 vals[lwrite] = aworkA[l]; 2588 cols[lwrite++] = cstart + cworkA[l]; 2589 } 2590 for (l=0; l<nzB; l++) { 2591 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2592 vals[lwrite] = aworkB[l]; 2593 cols[lwrite++] = ctmp; 2594 } 2595 } 2596 vals += ncols; 2597 cols += ncols; 2598 rptr[i+1] = rptr[i] + ncols; 2599 if (rownz_max < ncols) rownz_max = ncols; 2600 } 2601 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2602 } else { /* only copy matrix values into sbuf_a */ 2603 rptr = sbuf_j; 2604 vals = sbuf_a; 2605 rptr[0] = 0; 2606 for (i=0; i<rend-rstart; i++){ 2607 row = i + rstart; 2608 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2609 ncols = nzA + nzB; 2610 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2611 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2612 lwrite = 0; 2613 for (l=0; l<nzB; l++) { 2614 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2615 } 2616 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2617 for (l=0; l<nzB; l++) { 2618 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2619 } 2620 vals += ncols; 2621 rptr[i+1] = rptr[i] + ncols; 2622 } 2623 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2624 2625 /* send nzlocal to others, and recv other's nzlocal */ 2626 /*--------------------------------------------------*/ 2627 if (reuse == MAT_INITIAL_MATRIX){ 2628 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2629 s_waits2 = s_waits3 + nsends; 2630 s_waits1 = s_waits2 + nsends; 2631 r_waits1 = s_waits1 + nsends; 2632 r_waits2 = r_waits1 + nrecvs; 2633 r_waits3 = r_waits2 + nrecvs; 2634 } else { 2635 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2636 r_waits3 = s_waits3 + nsends; 2637 } 2638 2639 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2640 if (reuse == MAT_INITIAL_MATRIX){ 2641 /* get new tags to keep the communication clean */ 2642 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2643 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2644 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2645 2646 /* post receives of other's nzlocal */ 2647 for (i=0; i<nrecvs; i++){ 2648 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2649 } 2650 /* send nzlocal to others */ 2651 for (i=0; i<nsends; i++){ 2652 sbuf_nz[i] = nzlocal; 2653 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2654 } 2655 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2656 count = nrecvs; 2657 while (count) { 2658 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2659 recv_rank[imdex] = recv_status.MPI_SOURCE; 2660 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2661 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2662 2663 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2664 rbuf_nz[imdex] += i + 2; 2665 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2666 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2667 count--; 2668 } 2669 /* wait on sends of nzlocal */ 2670 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2671 /* send mat->i,j to others, and recv from other's */ 2672 /*------------------------------------------------*/ 2673 for (i=0; i<nsends; i++){ 2674 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2675 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2676 } 2677 /* wait on receives of mat->i,j */ 2678 /*------------------------------*/ 2679 count = nrecvs; 2680 while (count) { 2681 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2682 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2683 count--; 2684 } 2685 /* wait on sends of mat->i,j */ 2686 /*---------------------------*/ 2687 if (nsends) { 2688 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2689 } 2690 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2691 2692 /* post receives, send and receive mat->a */ 2693 /*----------------------------------------*/ 2694 for (imdex=0; imdex<nrecvs; imdex++) { 2695 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2696 } 2697 for (i=0; i<nsends; i++){ 2698 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2699 } 2700 count = nrecvs; 2701 while (count) { 2702 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2703 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2704 count--; 2705 } 2706 if (nsends) { 2707 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2708 } 2709 2710 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2711 2712 /* create redundant matrix */ 2713 /*-------------------------*/ 2714 if (reuse == MAT_INITIAL_MATRIX){ 2715 /* compute rownz_max for preallocation */ 2716 for (imdex=0; imdex<nrecvs; imdex++){ 2717 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2718 rptr = rbuf_j[imdex]; 2719 for (i=0; i<j; i++){ 2720 ncols = rptr[i+1] - rptr[i]; 2721 if (rownz_max < ncols) rownz_max = ncols; 2722 } 2723 } 2724 2725 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2726 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2727 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2728 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2729 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2730 } else { 2731 C = *matredundant; 2732 } 2733 2734 /* insert local matrix entries */ 2735 rptr = sbuf_j; 2736 cols = sbuf_j + rend-rstart + 1; 2737 vals = sbuf_a; 2738 for (i=0; i<rend-rstart; i++){ 2739 row = i + rstart; 2740 ncols = rptr[i+1] - rptr[i]; 2741 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2742 vals += ncols; 2743 cols += ncols; 2744 } 2745 /* insert received matrix entries */ 2746 for (imdex=0; imdex<nrecvs; imdex++){ 2747 rstart = rowrange[recv_rank[imdex]]; 2748 rend = rowrange[recv_rank[imdex]+1]; 2749 rptr = rbuf_j[imdex]; 2750 cols = rbuf_j[imdex] + rend-rstart + 1; 2751 vals = rbuf_a[imdex]; 2752 for (i=0; i<rend-rstart; i++){ 2753 row = i + rstart; 2754 ncols = rptr[i+1] - rptr[i]; 2755 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2756 vals += ncols; 2757 cols += ncols; 2758 } 2759 } 2760 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2761 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2762 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2763 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2764 if (reuse == MAT_INITIAL_MATRIX) { 2765 PetscContainer container; 2766 *matredundant = C; 2767 /* create a supporting struct and attach it to C for reuse */ 2768 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2769 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2770 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2771 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2772 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2773 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2774 2775 redund->nzlocal = nzlocal; 2776 redund->nsends = nsends; 2777 redund->nrecvs = nrecvs; 2778 redund->send_rank = send_rank; 2779 redund->recv_rank = recv_rank; 2780 redund->sbuf_nz = sbuf_nz; 2781 redund->rbuf_nz = rbuf_nz; 2782 redund->sbuf_j = sbuf_j; 2783 redund->sbuf_a = sbuf_a; 2784 redund->rbuf_j = rbuf_j; 2785 redund->rbuf_a = rbuf_a; 2786 2787 redund->Destroy = C->ops->destroy; 2788 C->ops->destroy = MatDestroy_MatRedundant; 2789 } 2790 PetscFunctionReturn(0); 2791 } 2792 2793 #undef __FUNCT__ 2794 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2795 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2796 { 2797 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2798 PetscErrorCode ierr; 2799 PetscInt i,*idxb = 0; 2800 PetscScalar *va,*vb; 2801 Vec vtmp; 2802 2803 PetscFunctionBegin; 2804 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2805 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2806 if (idx) { 2807 for (i=0; i<A->rmap->n; i++) { 2808 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2809 } 2810 } 2811 2812 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2813 if (idx) { 2814 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2815 } 2816 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2817 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2818 2819 for (i=0; i<A->rmap->n; i++){ 2820 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2821 va[i] = vb[i]; 2822 if (idx) idx[i] = a->garray[idxb[i]]; 2823 } 2824 } 2825 2826 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2827 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2828 ierr = PetscFree(idxb);CHKERRQ(ierr); 2829 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2830 PetscFunctionReturn(0); 2831 } 2832 2833 #undef __FUNCT__ 2834 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2835 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2836 { 2837 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2838 PetscErrorCode ierr; 2839 PetscInt i,*idxb = 0; 2840 PetscScalar *va,*vb; 2841 Vec vtmp; 2842 2843 PetscFunctionBegin; 2844 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2845 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2846 if (idx) { 2847 for (i=0; i<A->cmap->n; i++) { 2848 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2849 } 2850 } 2851 2852 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2853 if (idx) { 2854 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2855 } 2856 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2857 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2858 2859 for (i=0; i<A->rmap->n; i++){ 2860 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2861 va[i] = vb[i]; 2862 if (idx) idx[i] = a->garray[idxb[i]]; 2863 } 2864 } 2865 2866 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2867 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2868 ierr = PetscFree(idxb);CHKERRQ(ierr); 2869 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2870 PetscFunctionReturn(0); 2871 } 2872 2873 #undef __FUNCT__ 2874 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2875 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2876 { 2877 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2878 PetscInt n = A->rmap->n; 2879 PetscInt cstart = A->cmap->rstart; 2880 PetscInt *cmap = mat->garray; 2881 PetscInt *diagIdx, *offdiagIdx; 2882 Vec diagV, offdiagV; 2883 PetscScalar *a, *diagA, *offdiagA; 2884 PetscInt r; 2885 PetscErrorCode ierr; 2886 2887 PetscFunctionBegin; 2888 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2889 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2890 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2891 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2892 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2893 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2894 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2895 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2896 for(r = 0; r < n; ++r) { 2897 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2898 a[r] = diagA[r]; 2899 idx[r] = cstart + diagIdx[r]; 2900 } else { 2901 a[r] = offdiagA[r]; 2902 idx[r] = cmap[offdiagIdx[r]]; 2903 } 2904 } 2905 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2906 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2907 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2908 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2909 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2910 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2911 PetscFunctionReturn(0); 2912 } 2913 2914 #undef __FUNCT__ 2915 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2916 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2917 { 2918 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2919 PetscInt n = A->rmap->n; 2920 PetscInt cstart = A->cmap->rstart; 2921 PetscInt *cmap = mat->garray; 2922 PetscInt *diagIdx, *offdiagIdx; 2923 Vec diagV, offdiagV; 2924 PetscScalar *a, *diagA, *offdiagA; 2925 PetscInt r; 2926 PetscErrorCode ierr; 2927 2928 PetscFunctionBegin; 2929 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2930 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2931 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2932 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2933 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2934 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2935 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2936 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2937 for(r = 0; r < n; ++r) { 2938 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2939 a[r] = diagA[r]; 2940 idx[r] = cstart + diagIdx[r]; 2941 } else { 2942 a[r] = offdiagA[r]; 2943 idx[r] = cmap[offdiagIdx[r]]; 2944 } 2945 } 2946 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2947 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2948 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2949 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2950 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2951 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2952 PetscFunctionReturn(0); 2953 } 2954 2955 #undef __FUNCT__ 2956 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2957 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2958 { 2959 PetscErrorCode ierr; 2960 Mat *dummy; 2961 2962 PetscFunctionBegin; 2963 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2964 *newmat = *dummy; 2965 ierr = PetscFree(dummy);CHKERRQ(ierr); 2966 PetscFunctionReturn(0); 2967 } 2968 2969 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2970 /* -------------------------------------------------------------------*/ 2971 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2972 MatGetRow_MPIAIJ, 2973 MatRestoreRow_MPIAIJ, 2974 MatMult_MPIAIJ, 2975 /* 4*/ MatMultAdd_MPIAIJ, 2976 MatMultTranspose_MPIAIJ, 2977 MatMultTransposeAdd_MPIAIJ, 2978 #ifdef PETSC_HAVE_PBGL 2979 MatSolve_MPIAIJ, 2980 #else 2981 0, 2982 #endif 2983 0, 2984 0, 2985 /*10*/ 0, 2986 0, 2987 0, 2988 MatSOR_MPIAIJ, 2989 MatTranspose_MPIAIJ, 2990 /*15*/ MatGetInfo_MPIAIJ, 2991 MatEqual_MPIAIJ, 2992 MatGetDiagonal_MPIAIJ, 2993 MatDiagonalScale_MPIAIJ, 2994 MatNorm_MPIAIJ, 2995 /*20*/ MatAssemblyBegin_MPIAIJ, 2996 MatAssemblyEnd_MPIAIJ, 2997 MatSetOption_MPIAIJ, 2998 MatZeroEntries_MPIAIJ, 2999 /*24*/ MatZeroRows_MPIAIJ, 3000 0, 3001 #ifdef PETSC_HAVE_PBGL 3002 0, 3003 #else 3004 0, 3005 #endif 3006 0, 3007 0, 3008 /*29*/ MatSetUpPreallocation_MPIAIJ, 3009 #ifdef PETSC_HAVE_PBGL 3010 0, 3011 #else 3012 0, 3013 #endif 3014 0, 3015 0, 3016 0, 3017 /*34*/ MatDuplicate_MPIAIJ, 3018 0, 3019 0, 3020 0, 3021 0, 3022 /*39*/ MatAXPY_MPIAIJ, 3023 MatGetSubMatrices_MPIAIJ, 3024 MatIncreaseOverlap_MPIAIJ, 3025 MatGetValues_MPIAIJ, 3026 MatCopy_MPIAIJ, 3027 /*44*/ MatGetRowMax_MPIAIJ, 3028 MatScale_MPIAIJ, 3029 0, 3030 0, 3031 MatZeroRowsColumns_MPIAIJ, 3032 /*49*/ MatSetBlockSize_MPIAIJ, 3033 0, 3034 0, 3035 0, 3036 0, 3037 /*54*/ MatFDColoringCreate_MPIAIJ, 3038 0, 3039 MatSetUnfactored_MPIAIJ, 3040 MatPermute_MPIAIJ, 3041 0, 3042 /*59*/ MatGetSubMatrix_MPIAIJ, 3043 MatDestroy_MPIAIJ, 3044 MatView_MPIAIJ, 3045 0, 3046 0, 3047 /*64*/ 0, 3048 0, 3049 0, 3050 0, 3051 0, 3052 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3053 MatGetRowMinAbs_MPIAIJ, 3054 0, 3055 MatSetColoring_MPIAIJ, 3056 #if defined(PETSC_HAVE_ADIC) 3057 MatSetValuesAdic_MPIAIJ, 3058 #else 3059 0, 3060 #endif 3061 MatSetValuesAdifor_MPIAIJ, 3062 /*75*/ MatFDColoringApply_AIJ, 3063 0, 3064 0, 3065 0, 3066 0, 3067 /*80*/ 0, 3068 0, 3069 0, 3070 /*83*/ MatLoad_MPIAIJ, 3071 0, 3072 0, 3073 0, 3074 0, 3075 0, 3076 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3077 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3078 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3079 MatPtAP_Basic, 3080 MatPtAPSymbolic_MPIAIJ, 3081 /*94*/ MatPtAPNumeric_MPIAIJ, 3082 0, 3083 0, 3084 0, 3085 0, 3086 /*99*/ 0, 3087 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3088 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3089 MatConjugate_MPIAIJ, 3090 0, 3091 /*104*/MatSetValuesRow_MPIAIJ, 3092 MatRealPart_MPIAIJ, 3093 MatImaginaryPart_MPIAIJ, 3094 0, 3095 0, 3096 /*109*/0, 3097 MatGetRedundantMatrix_MPIAIJ, 3098 MatGetRowMin_MPIAIJ, 3099 0, 3100 0, 3101 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 3102 0, 3103 0, 3104 0, 3105 0, 3106 /*119*/0, 3107 0, 3108 0, 3109 0, 3110 MatGetMultiProcBlock_MPIAIJ, 3111 /*124*/MatFindNonZeroRows_MPIAIJ, 3112 MatGetColumnNorms_MPIAIJ, 3113 0, 3114 0, 3115 MatGetSubMatricesParallel_MPIAIJ 3116 }; 3117 3118 /* ----------------------------------------------------------------------------------------*/ 3119 3120 EXTERN_C_BEGIN 3121 #undef __FUNCT__ 3122 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3123 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3124 { 3125 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3126 PetscErrorCode ierr; 3127 3128 PetscFunctionBegin; 3129 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3130 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3131 PetscFunctionReturn(0); 3132 } 3133 EXTERN_C_END 3134 3135 EXTERN_C_BEGIN 3136 #undef __FUNCT__ 3137 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3138 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3139 { 3140 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3141 PetscErrorCode ierr; 3142 3143 PetscFunctionBegin; 3144 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3145 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3146 PetscFunctionReturn(0); 3147 } 3148 EXTERN_C_END 3149 3150 EXTERN_C_BEGIN 3151 #undef __FUNCT__ 3152 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3153 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3154 { 3155 Mat_MPIAIJ *b; 3156 PetscErrorCode ierr; 3157 PetscInt i; 3158 3159 PetscFunctionBegin; 3160 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3161 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3162 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3163 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3164 3165 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3166 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3167 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3168 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3169 if (d_nnz) { 3170 for (i=0; i<B->rmap->n; i++) { 3171 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3172 } 3173 } 3174 if (o_nnz) { 3175 for (i=0; i<B->rmap->n; i++) { 3176 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3177 } 3178 } 3179 b = (Mat_MPIAIJ*)B->data; 3180 3181 if (!B->preallocated) { 3182 /* Explicitly create 2 MATSEQAIJ matrices. */ 3183 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3184 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3185 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3186 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3187 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3188 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3189 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3190 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3191 } 3192 3193 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3194 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3195 B->preallocated = PETSC_TRUE; 3196 PetscFunctionReturn(0); 3197 } 3198 EXTERN_C_END 3199 3200 #undef __FUNCT__ 3201 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3202 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3203 { 3204 Mat mat; 3205 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3206 PetscErrorCode ierr; 3207 3208 PetscFunctionBegin; 3209 *newmat = 0; 3210 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3211 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3212 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3213 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3214 a = (Mat_MPIAIJ*)mat->data; 3215 3216 mat->factortype = matin->factortype; 3217 mat->rmap->bs = matin->rmap->bs; 3218 mat->assembled = PETSC_TRUE; 3219 mat->insertmode = NOT_SET_VALUES; 3220 mat->preallocated = PETSC_TRUE; 3221 3222 a->size = oldmat->size; 3223 a->rank = oldmat->rank; 3224 a->donotstash = oldmat->donotstash; 3225 a->roworiented = oldmat->roworiented; 3226 a->rowindices = 0; 3227 a->rowvalues = 0; 3228 a->getrowactive = PETSC_FALSE; 3229 3230 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3231 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3232 3233 if (oldmat->colmap) { 3234 #if defined (PETSC_USE_CTABLE) 3235 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3236 #else 3237 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3238 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3239 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3240 #endif 3241 } else a->colmap = 0; 3242 if (oldmat->garray) { 3243 PetscInt len; 3244 len = oldmat->B->cmap->n; 3245 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3246 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3247 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3248 } else a->garray = 0; 3249 3250 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3251 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3252 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3253 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3254 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3255 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3256 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3257 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3258 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3259 *newmat = mat; 3260 PetscFunctionReturn(0); 3261 } 3262 3263 3264 3265 #undef __FUNCT__ 3266 #define __FUNCT__ "MatLoad_MPIAIJ" 3267 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3268 { 3269 PetscScalar *vals,*svals; 3270 MPI_Comm comm = ((PetscObject)viewer)->comm; 3271 PetscErrorCode ierr; 3272 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3273 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3274 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3275 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3276 PetscInt cend,cstart,n,*rowners,sizesset=1; 3277 int fd; 3278 3279 PetscFunctionBegin; 3280 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3281 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3282 if (!rank) { 3283 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3284 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3285 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3286 } 3287 3288 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3289 3290 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3291 M = header[1]; N = header[2]; 3292 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3293 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3294 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3295 3296 /* If global sizes are set, check if they are consistent with that given in the file */ 3297 if (sizesset) { 3298 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3299 } 3300 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3301 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3302 3303 /* determine ownership of all rows */ 3304 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3305 else m = newMat->rmap->n; /* Set by user */ 3306 3307 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3308 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3309 3310 /* First process needs enough room for process with most rows */ 3311 if (!rank) { 3312 mmax = rowners[1]; 3313 for (i=2; i<size; i++) { 3314 mmax = PetscMax(mmax,rowners[i]); 3315 } 3316 } else mmax = m; 3317 3318 rowners[0] = 0; 3319 for (i=2; i<=size; i++) { 3320 rowners[i] += rowners[i-1]; 3321 } 3322 rstart = rowners[rank]; 3323 rend = rowners[rank+1]; 3324 3325 /* distribute row lengths to all processors */ 3326 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3327 if (!rank) { 3328 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3329 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3330 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3331 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3332 for (j=0; j<m; j++) { 3333 procsnz[0] += ourlens[j]; 3334 } 3335 for (i=1; i<size; i++) { 3336 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3337 /* calculate the number of nonzeros on each processor */ 3338 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3339 procsnz[i] += rowlengths[j]; 3340 } 3341 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3342 } 3343 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3344 } else { 3345 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3346 } 3347 3348 if (!rank) { 3349 /* determine max buffer needed and allocate it */ 3350 maxnz = 0; 3351 for (i=0; i<size; i++) { 3352 maxnz = PetscMax(maxnz,procsnz[i]); 3353 } 3354 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3355 3356 /* read in my part of the matrix column indices */ 3357 nz = procsnz[0]; 3358 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3359 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3360 3361 /* read in every one elses and ship off */ 3362 for (i=1; i<size; i++) { 3363 nz = procsnz[i]; 3364 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3365 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3366 } 3367 ierr = PetscFree(cols);CHKERRQ(ierr); 3368 } else { 3369 /* determine buffer space needed for message */ 3370 nz = 0; 3371 for (i=0; i<m; i++) { 3372 nz += ourlens[i]; 3373 } 3374 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3375 3376 /* receive message of column indices*/ 3377 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3378 } 3379 3380 /* determine column ownership if matrix is not square */ 3381 if (N != M) { 3382 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3383 else n = newMat->cmap->n; 3384 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3385 cstart = cend - n; 3386 } else { 3387 cstart = rstart; 3388 cend = rend; 3389 n = cend - cstart; 3390 } 3391 3392 /* loop over local rows, determining number of off diagonal entries */ 3393 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3394 jj = 0; 3395 for (i=0; i<m; i++) { 3396 for (j=0; j<ourlens[i]; j++) { 3397 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3398 jj++; 3399 } 3400 } 3401 3402 for (i=0; i<m; i++) { 3403 ourlens[i] -= offlens[i]; 3404 } 3405 if (!sizesset) { 3406 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3407 } 3408 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3409 3410 for (i=0; i<m; i++) { 3411 ourlens[i] += offlens[i]; 3412 } 3413 3414 if (!rank) { 3415 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3416 3417 /* read in my part of the matrix numerical values */ 3418 nz = procsnz[0]; 3419 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3420 3421 /* insert into matrix */ 3422 jj = rstart; 3423 smycols = mycols; 3424 svals = vals; 3425 for (i=0; i<m; i++) { 3426 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3427 smycols += ourlens[i]; 3428 svals += ourlens[i]; 3429 jj++; 3430 } 3431 3432 /* read in other processors and ship out */ 3433 for (i=1; i<size; i++) { 3434 nz = procsnz[i]; 3435 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3436 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3437 } 3438 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3439 } else { 3440 /* receive numeric values */ 3441 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3442 3443 /* receive message of values*/ 3444 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3445 3446 /* insert into matrix */ 3447 jj = rstart; 3448 smycols = mycols; 3449 svals = vals; 3450 for (i=0; i<m; i++) { 3451 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3452 smycols += ourlens[i]; 3453 svals += ourlens[i]; 3454 jj++; 3455 } 3456 } 3457 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3458 ierr = PetscFree(vals);CHKERRQ(ierr); 3459 ierr = PetscFree(mycols);CHKERRQ(ierr); 3460 ierr = PetscFree(rowners);CHKERRQ(ierr); 3461 3462 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3463 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3464 PetscFunctionReturn(0); 3465 } 3466 3467 #undef __FUNCT__ 3468 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3469 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3470 { 3471 PetscErrorCode ierr; 3472 IS iscol_local; 3473 PetscInt csize; 3474 3475 PetscFunctionBegin; 3476 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3477 if (call == MAT_REUSE_MATRIX) { 3478 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3479 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3480 } else { 3481 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3482 } 3483 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3484 if (call == MAT_INITIAL_MATRIX) { 3485 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3486 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3487 } 3488 PetscFunctionReturn(0); 3489 } 3490 3491 #undef __FUNCT__ 3492 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3493 /* 3494 Not great since it makes two copies of the submatrix, first an SeqAIJ 3495 in local and then by concatenating the local matrices the end result. 3496 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3497 3498 Note: This requires a sequential iscol with all indices. 3499 */ 3500 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3501 { 3502 PetscErrorCode ierr; 3503 PetscMPIInt rank,size; 3504 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3505 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3506 Mat *local,M,Mreuse; 3507 MatScalar *vwork,*aa; 3508 MPI_Comm comm = ((PetscObject)mat)->comm; 3509 Mat_SeqAIJ *aij; 3510 3511 3512 PetscFunctionBegin; 3513 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3514 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3515 3516 if (call == MAT_REUSE_MATRIX) { 3517 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3518 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3519 local = &Mreuse; 3520 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3521 } else { 3522 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3523 Mreuse = *local; 3524 ierr = PetscFree(local);CHKERRQ(ierr); 3525 } 3526 3527 /* 3528 m - number of local rows 3529 n - number of columns (same on all processors) 3530 rstart - first row in new global matrix generated 3531 */ 3532 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3533 if (call == MAT_INITIAL_MATRIX) { 3534 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3535 ii = aij->i; 3536 jj = aij->j; 3537 3538 /* 3539 Determine the number of non-zeros in the diagonal and off-diagonal 3540 portions of the matrix in order to do correct preallocation 3541 */ 3542 3543 /* first get start and end of "diagonal" columns */ 3544 if (csize == PETSC_DECIDE) { 3545 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3546 if (mglobal == n) { /* square matrix */ 3547 nlocal = m; 3548 } else { 3549 nlocal = n/size + ((n % size) > rank); 3550 } 3551 } else { 3552 nlocal = csize; 3553 } 3554 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3555 rstart = rend - nlocal; 3556 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3557 3558 /* next, compute all the lengths */ 3559 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3560 olens = dlens + m; 3561 for (i=0; i<m; i++) { 3562 jend = ii[i+1] - ii[i]; 3563 olen = 0; 3564 dlen = 0; 3565 for (j=0; j<jend; j++) { 3566 if (*jj < rstart || *jj >= rend) olen++; 3567 else dlen++; 3568 jj++; 3569 } 3570 olens[i] = olen; 3571 dlens[i] = dlen; 3572 } 3573 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3574 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3575 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3576 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3577 ierr = PetscFree(dlens);CHKERRQ(ierr); 3578 } else { 3579 PetscInt ml,nl; 3580 3581 M = *newmat; 3582 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3583 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3584 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3585 /* 3586 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3587 rather than the slower MatSetValues(). 3588 */ 3589 M->was_assembled = PETSC_TRUE; 3590 M->assembled = PETSC_FALSE; 3591 } 3592 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3593 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3594 ii = aij->i; 3595 jj = aij->j; 3596 aa = aij->a; 3597 for (i=0; i<m; i++) { 3598 row = rstart + i; 3599 nz = ii[i+1] - ii[i]; 3600 cwork = jj; jj += nz; 3601 vwork = aa; aa += nz; 3602 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3603 } 3604 3605 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3606 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3607 *newmat = M; 3608 3609 /* save submatrix used in processor for next request */ 3610 if (call == MAT_INITIAL_MATRIX) { 3611 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3612 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3613 } 3614 3615 PetscFunctionReturn(0); 3616 } 3617 3618 EXTERN_C_BEGIN 3619 #undef __FUNCT__ 3620 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3621 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3622 { 3623 PetscInt m,cstart, cend,j,nnz,i,d; 3624 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3625 const PetscInt *JJ; 3626 PetscScalar *values; 3627 PetscErrorCode ierr; 3628 3629 PetscFunctionBegin; 3630 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3631 3632 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3633 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3634 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3635 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3636 m = B->rmap->n; 3637 cstart = B->cmap->rstart; 3638 cend = B->cmap->rend; 3639 rstart = B->rmap->rstart; 3640 3641 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3642 3643 #if defined(PETSC_USE_DEBUGGING) 3644 for (i=0; i<m; i++) { 3645 nnz = Ii[i+1]- Ii[i]; 3646 JJ = J + Ii[i]; 3647 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3648 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3649 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3650 } 3651 #endif 3652 3653 for (i=0; i<m; i++) { 3654 nnz = Ii[i+1]- Ii[i]; 3655 JJ = J + Ii[i]; 3656 nnz_max = PetscMax(nnz_max,nnz); 3657 d = 0; 3658 for (j=0; j<nnz; j++) { 3659 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3660 } 3661 d_nnz[i] = d; 3662 o_nnz[i] = nnz - d; 3663 } 3664 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3665 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3666 3667 if (v) values = (PetscScalar*)v; 3668 else { 3669 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3670 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3671 } 3672 3673 for (i=0; i<m; i++) { 3674 ii = i + rstart; 3675 nnz = Ii[i+1]- Ii[i]; 3676 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3677 } 3678 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3679 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3680 3681 if (!v) { 3682 ierr = PetscFree(values);CHKERRQ(ierr); 3683 } 3684 PetscFunctionReturn(0); 3685 } 3686 EXTERN_C_END 3687 3688 #undef __FUNCT__ 3689 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3690 /*@ 3691 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3692 (the default parallel PETSc format). 3693 3694 Collective on MPI_Comm 3695 3696 Input Parameters: 3697 + B - the matrix 3698 . i - the indices into j for the start of each local row (starts with zero) 3699 . j - the column indices for each local row (starts with zero) 3700 - v - optional values in the matrix 3701 3702 Level: developer 3703 3704 Notes: 3705 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3706 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3707 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3708 3709 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3710 3711 The format which is used for the sparse matrix input, is equivalent to a 3712 row-major ordering.. i.e for the following matrix, the input data expected is 3713 as shown: 3714 3715 1 0 0 3716 2 0 3 P0 3717 ------- 3718 4 5 6 P1 3719 3720 Process0 [P0]: rows_owned=[0,1] 3721 i = {0,1,3} [size = nrow+1 = 2+1] 3722 j = {0,0,2} [size = nz = 6] 3723 v = {1,2,3} [size = nz = 6] 3724 3725 Process1 [P1]: rows_owned=[2] 3726 i = {0,3} [size = nrow+1 = 1+1] 3727 j = {0,1,2} [size = nz = 6] 3728 v = {4,5,6} [size = nz = 6] 3729 3730 .keywords: matrix, aij, compressed row, sparse, parallel 3731 3732 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3733 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3734 @*/ 3735 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3736 { 3737 PetscErrorCode ierr; 3738 3739 PetscFunctionBegin; 3740 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3741 PetscFunctionReturn(0); 3742 } 3743 3744 #undef __FUNCT__ 3745 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3746 /*@C 3747 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3748 (the default parallel PETSc format). For good matrix assembly performance 3749 the user should preallocate the matrix storage by setting the parameters 3750 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3751 performance can be increased by more than a factor of 50. 3752 3753 Collective on MPI_Comm 3754 3755 Input Parameters: 3756 + A - the matrix 3757 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3758 (same value is used for all local rows) 3759 . d_nnz - array containing the number of nonzeros in the various rows of the 3760 DIAGONAL portion of the local submatrix (possibly different for each row) 3761 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3762 The size of this array is equal to the number of local rows, i.e 'm'. 3763 You must leave room for the diagonal entry even if it is zero. 3764 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3765 submatrix (same value is used for all local rows). 3766 - o_nnz - array containing the number of nonzeros in the various rows of the 3767 OFF-DIAGONAL portion of the local submatrix (possibly different for 3768 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3769 structure. The size of this array is equal to the number 3770 of local rows, i.e 'm'. 3771 3772 If the *_nnz parameter is given then the *_nz parameter is ignored 3773 3774 The AIJ format (also called the Yale sparse matrix format or 3775 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3776 storage. The stored row and column indices begin with zero. 3777 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3778 3779 The parallel matrix is partitioned such that the first m0 rows belong to 3780 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3781 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3782 3783 The DIAGONAL portion of the local submatrix of a processor can be defined 3784 as the submatrix which is obtained by extraction the part corresponding to 3785 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3786 first row that belongs to the processor, r2 is the last row belonging to 3787 the this processor, and c1-c2 is range of indices of the local part of a 3788 vector suitable for applying the matrix to. This is an mxn matrix. In the 3789 common case of a square matrix, the row and column ranges are the same and 3790 the DIAGONAL part is also square. The remaining portion of the local 3791 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3792 3793 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3794 3795 You can call MatGetInfo() to get information on how effective the preallocation was; 3796 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3797 You can also run with the option -info and look for messages with the string 3798 malloc in them to see if additional memory allocation was needed. 3799 3800 Example usage: 3801 3802 Consider the following 8x8 matrix with 34 non-zero values, that is 3803 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3804 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3805 as follows: 3806 3807 .vb 3808 1 2 0 | 0 3 0 | 0 4 3809 Proc0 0 5 6 | 7 0 0 | 8 0 3810 9 0 10 | 11 0 0 | 12 0 3811 ------------------------------------- 3812 13 0 14 | 15 16 17 | 0 0 3813 Proc1 0 18 0 | 19 20 21 | 0 0 3814 0 0 0 | 22 23 0 | 24 0 3815 ------------------------------------- 3816 Proc2 25 26 27 | 0 0 28 | 29 0 3817 30 0 0 | 31 32 33 | 0 34 3818 .ve 3819 3820 This can be represented as a collection of submatrices as: 3821 3822 .vb 3823 A B C 3824 D E F 3825 G H I 3826 .ve 3827 3828 Where the submatrices A,B,C are owned by proc0, D,E,F are 3829 owned by proc1, G,H,I are owned by proc2. 3830 3831 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3832 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3833 The 'M','N' parameters are 8,8, and have the same values on all procs. 3834 3835 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3836 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3837 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3838 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3839 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3840 matrix, ans [DF] as another SeqAIJ matrix. 3841 3842 When d_nz, o_nz parameters are specified, d_nz storage elements are 3843 allocated for every row of the local diagonal submatrix, and o_nz 3844 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3845 One way to choose d_nz and o_nz is to use the max nonzerors per local 3846 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3847 In this case, the values of d_nz,o_nz are: 3848 .vb 3849 proc0 : dnz = 2, o_nz = 2 3850 proc1 : dnz = 3, o_nz = 2 3851 proc2 : dnz = 1, o_nz = 4 3852 .ve 3853 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3854 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3855 for proc3. i.e we are using 12+15+10=37 storage locations to store 3856 34 values. 3857 3858 When d_nnz, o_nnz parameters are specified, the storage is specified 3859 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3860 In the above case the values for d_nnz,o_nnz are: 3861 .vb 3862 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3863 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3864 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3865 .ve 3866 Here the space allocated is sum of all the above values i.e 34, and 3867 hence pre-allocation is perfect. 3868 3869 Level: intermediate 3870 3871 .keywords: matrix, aij, compressed row, sparse, parallel 3872 3873 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3874 MPIAIJ, MatGetInfo() 3875 @*/ 3876 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3877 { 3878 PetscErrorCode ierr; 3879 3880 PetscFunctionBegin; 3881 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3882 PetscFunctionReturn(0); 3883 } 3884 3885 #undef __FUNCT__ 3886 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3887 /*@ 3888 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3889 CSR format the local rows. 3890 3891 Collective on MPI_Comm 3892 3893 Input Parameters: 3894 + comm - MPI communicator 3895 . m - number of local rows (Cannot be PETSC_DECIDE) 3896 . n - This value should be the same as the local size used in creating the 3897 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3898 calculated if N is given) For square matrices n is almost always m. 3899 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3900 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3901 . i - row indices 3902 . j - column indices 3903 - a - matrix values 3904 3905 Output Parameter: 3906 . mat - the matrix 3907 3908 Level: intermediate 3909 3910 Notes: 3911 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3912 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3913 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3914 3915 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3916 3917 The format which is used for the sparse matrix input, is equivalent to a 3918 row-major ordering.. i.e for the following matrix, the input data expected is 3919 as shown: 3920 3921 1 0 0 3922 2 0 3 P0 3923 ------- 3924 4 5 6 P1 3925 3926 Process0 [P0]: rows_owned=[0,1] 3927 i = {0,1,3} [size = nrow+1 = 2+1] 3928 j = {0,0,2} [size = nz = 6] 3929 v = {1,2,3} [size = nz = 6] 3930 3931 Process1 [P1]: rows_owned=[2] 3932 i = {0,3} [size = nrow+1 = 1+1] 3933 j = {0,1,2} [size = nz = 6] 3934 v = {4,5,6} [size = nz = 6] 3935 3936 .keywords: matrix, aij, compressed row, sparse, parallel 3937 3938 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3939 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3940 @*/ 3941 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3942 { 3943 PetscErrorCode ierr; 3944 3945 PetscFunctionBegin; 3946 if (i[0]) { 3947 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3948 } 3949 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3950 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3951 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3952 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3953 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3954 PetscFunctionReturn(0); 3955 } 3956 3957 #undef __FUNCT__ 3958 #define __FUNCT__ "MatCreateMPIAIJ" 3959 /*@C 3960 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3961 (the default parallel PETSc format). For good matrix assembly performance 3962 the user should preallocate the matrix storage by setting the parameters 3963 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3964 performance can be increased by more than a factor of 50. 3965 3966 Collective on MPI_Comm 3967 3968 Input Parameters: 3969 + comm - MPI communicator 3970 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3971 This value should be the same as the local size used in creating the 3972 y vector for the matrix-vector product y = Ax. 3973 . n - This value should be the same as the local size used in creating the 3974 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3975 calculated if N is given) For square matrices n is almost always m. 3976 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3977 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3978 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3979 (same value is used for all local rows) 3980 . d_nnz - array containing the number of nonzeros in the various rows of the 3981 DIAGONAL portion of the local submatrix (possibly different for each row) 3982 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3983 The size of this array is equal to the number of local rows, i.e 'm'. 3984 You must leave room for the diagonal entry even if it is zero. 3985 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3986 submatrix (same value is used for all local rows). 3987 - o_nnz - array containing the number of nonzeros in the various rows of the 3988 OFF-DIAGONAL portion of the local submatrix (possibly different for 3989 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3990 structure. The size of this array is equal to the number 3991 of local rows, i.e 'm'. 3992 3993 Output Parameter: 3994 . A - the matrix 3995 3996 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3997 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3998 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3999 4000 Notes: 4001 If the *_nnz parameter is given then the *_nz parameter is ignored 4002 4003 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4004 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4005 storage requirements for this matrix. 4006 4007 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4008 processor than it must be used on all processors that share the object for 4009 that argument. 4010 4011 The user MUST specify either the local or global matrix dimensions 4012 (possibly both). 4013 4014 The parallel matrix is partitioned across processors such that the 4015 first m0 rows belong to process 0, the next m1 rows belong to 4016 process 1, the next m2 rows belong to process 2 etc.. where 4017 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4018 values corresponding to [m x N] submatrix. 4019 4020 The columns are logically partitioned with the n0 columns belonging 4021 to 0th partition, the next n1 columns belonging to the next 4022 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4023 4024 The DIAGONAL portion of the local submatrix on any given processor 4025 is the submatrix corresponding to the rows and columns m,n 4026 corresponding to the given processor. i.e diagonal matrix on 4027 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4028 etc. The remaining portion of the local submatrix [m x (N-n)] 4029 constitute the OFF-DIAGONAL portion. The example below better 4030 illustrates this concept. 4031 4032 For a square global matrix we define each processor's diagonal portion 4033 to be its local rows and the corresponding columns (a square submatrix); 4034 each processor's off-diagonal portion encompasses the remainder of the 4035 local matrix (a rectangular submatrix). 4036 4037 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4038 4039 When calling this routine with a single process communicator, a matrix of 4040 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4041 type of communicator, use the construction mechanism: 4042 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4043 4044 By default, this format uses inodes (identical nodes) when possible. 4045 We search for consecutive rows with the same nonzero structure, thereby 4046 reusing matrix information to achieve increased efficiency. 4047 4048 Options Database Keys: 4049 + -mat_no_inode - Do not use inodes 4050 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4051 - -mat_aij_oneindex - Internally use indexing starting at 1 4052 rather than 0. Note that when calling MatSetValues(), 4053 the user still MUST index entries starting at 0! 4054 4055 4056 Example usage: 4057 4058 Consider the following 8x8 matrix with 34 non-zero values, that is 4059 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4060 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4061 as follows: 4062 4063 .vb 4064 1 2 0 | 0 3 0 | 0 4 4065 Proc0 0 5 6 | 7 0 0 | 8 0 4066 9 0 10 | 11 0 0 | 12 0 4067 ------------------------------------- 4068 13 0 14 | 15 16 17 | 0 0 4069 Proc1 0 18 0 | 19 20 21 | 0 0 4070 0 0 0 | 22 23 0 | 24 0 4071 ------------------------------------- 4072 Proc2 25 26 27 | 0 0 28 | 29 0 4073 30 0 0 | 31 32 33 | 0 34 4074 .ve 4075 4076 This can be represented as a collection of submatrices as: 4077 4078 .vb 4079 A B C 4080 D E F 4081 G H I 4082 .ve 4083 4084 Where the submatrices A,B,C are owned by proc0, D,E,F are 4085 owned by proc1, G,H,I are owned by proc2. 4086 4087 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4088 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4089 The 'M','N' parameters are 8,8, and have the same values on all procs. 4090 4091 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4092 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4093 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4094 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4095 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4096 matrix, ans [DF] as another SeqAIJ matrix. 4097 4098 When d_nz, o_nz parameters are specified, d_nz storage elements are 4099 allocated for every row of the local diagonal submatrix, and o_nz 4100 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4101 One way to choose d_nz and o_nz is to use the max nonzerors per local 4102 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4103 In this case, the values of d_nz,o_nz are: 4104 .vb 4105 proc0 : dnz = 2, o_nz = 2 4106 proc1 : dnz = 3, o_nz = 2 4107 proc2 : dnz = 1, o_nz = 4 4108 .ve 4109 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4110 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4111 for proc3. i.e we are using 12+15+10=37 storage locations to store 4112 34 values. 4113 4114 When d_nnz, o_nnz parameters are specified, the storage is specified 4115 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4116 In the above case the values for d_nnz,o_nnz are: 4117 .vb 4118 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4119 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4120 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4121 .ve 4122 Here the space allocated is sum of all the above values i.e 34, and 4123 hence pre-allocation is perfect. 4124 4125 Level: intermediate 4126 4127 .keywords: matrix, aij, compressed row, sparse, parallel 4128 4129 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4130 MPIAIJ, MatCreateMPIAIJWithArrays() 4131 @*/ 4132 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4133 { 4134 PetscErrorCode ierr; 4135 PetscMPIInt size; 4136 4137 PetscFunctionBegin; 4138 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4139 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4140 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4141 if (size > 1) { 4142 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4143 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4144 } else { 4145 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4146 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4147 } 4148 PetscFunctionReturn(0); 4149 } 4150 4151 #undef __FUNCT__ 4152 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4153 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4154 { 4155 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4156 4157 PetscFunctionBegin; 4158 *Ad = a->A; 4159 *Ao = a->B; 4160 *colmap = a->garray; 4161 PetscFunctionReturn(0); 4162 } 4163 4164 #undef __FUNCT__ 4165 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4166 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4167 { 4168 PetscErrorCode ierr; 4169 PetscInt i; 4170 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4171 4172 PetscFunctionBegin; 4173 if (coloring->ctype == IS_COLORING_GLOBAL) { 4174 ISColoringValue *allcolors,*colors; 4175 ISColoring ocoloring; 4176 4177 /* set coloring for diagonal portion */ 4178 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4179 4180 /* set coloring for off-diagonal portion */ 4181 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4182 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4183 for (i=0; i<a->B->cmap->n; i++) { 4184 colors[i] = allcolors[a->garray[i]]; 4185 } 4186 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4187 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4188 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4189 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4190 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4191 ISColoringValue *colors; 4192 PetscInt *larray; 4193 ISColoring ocoloring; 4194 4195 /* set coloring for diagonal portion */ 4196 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4197 for (i=0; i<a->A->cmap->n; i++) { 4198 larray[i] = i + A->cmap->rstart; 4199 } 4200 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4201 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4202 for (i=0; i<a->A->cmap->n; i++) { 4203 colors[i] = coloring->colors[larray[i]]; 4204 } 4205 ierr = PetscFree(larray);CHKERRQ(ierr); 4206 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4207 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4208 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4209 4210 /* set coloring for off-diagonal portion */ 4211 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4212 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4213 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4214 for (i=0; i<a->B->cmap->n; i++) { 4215 colors[i] = coloring->colors[larray[i]]; 4216 } 4217 ierr = PetscFree(larray);CHKERRQ(ierr); 4218 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4219 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4220 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4221 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4222 4223 PetscFunctionReturn(0); 4224 } 4225 4226 #if defined(PETSC_HAVE_ADIC) 4227 #undef __FUNCT__ 4228 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4229 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4230 { 4231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4232 PetscErrorCode ierr; 4233 4234 PetscFunctionBegin; 4235 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4236 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4237 PetscFunctionReturn(0); 4238 } 4239 #endif 4240 4241 #undef __FUNCT__ 4242 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4243 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4244 { 4245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4246 PetscErrorCode ierr; 4247 4248 PetscFunctionBegin; 4249 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4250 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4251 PetscFunctionReturn(0); 4252 } 4253 4254 #undef __FUNCT__ 4255 #define __FUNCT__ "MatMerge" 4256 /*@ 4257 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4258 matrices from each processor 4259 4260 Collective on MPI_Comm 4261 4262 Input Parameters: 4263 + comm - the communicators the parallel matrix will live on 4264 . inmat - the input sequential matrices 4265 . n - number of local columns (or PETSC_DECIDE) 4266 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4267 4268 Output Parameter: 4269 . outmat - the parallel matrix generated 4270 4271 Level: advanced 4272 4273 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4274 4275 @*/ 4276 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4277 { 4278 PetscErrorCode ierr; 4279 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4280 PetscInt *indx; 4281 PetscScalar *values; 4282 4283 PetscFunctionBegin; 4284 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4285 if (scall == MAT_INITIAL_MATRIX){ 4286 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4287 if (n == PETSC_DECIDE){ 4288 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4289 } 4290 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4291 rstart -= m; 4292 4293 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4294 for (i=0;i<m;i++) { 4295 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4296 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4297 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4298 } 4299 /* This routine will ONLY return MPIAIJ type matrix */ 4300 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4301 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4302 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4303 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4304 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4305 4306 } else if (scall == MAT_REUSE_MATRIX){ 4307 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4308 } else { 4309 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4310 } 4311 4312 for (i=0;i<m;i++) { 4313 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4314 Ii = i + rstart; 4315 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4316 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4317 } 4318 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4319 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4320 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4321 4322 PetscFunctionReturn(0); 4323 } 4324 4325 #undef __FUNCT__ 4326 #define __FUNCT__ "MatFileSplit" 4327 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4328 { 4329 PetscErrorCode ierr; 4330 PetscMPIInt rank; 4331 PetscInt m,N,i,rstart,nnz; 4332 size_t len; 4333 const PetscInt *indx; 4334 PetscViewer out; 4335 char *name; 4336 Mat B; 4337 const PetscScalar *values; 4338 4339 PetscFunctionBegin; 4340 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4341 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4342 /* Should this be the type of the diagonal block of A? */ 4343 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4344 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4345 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4346 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4347 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4348 for (i=0;i<m;i++) { 4349 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4350 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4351 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4352 } 4353 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4354 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4355 4356 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4357 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4358 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4359 sprintf(name,"%s.%d",outfile,rank); 4360 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4361 ierr = PetscFree(name); 4362 ierr = MatView(B,out);CHKERRQ(ierr); 4363 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4364 ierr = MatDestroy(&B);CHKERRQ(ierr); 4365 PetscFunctionReturn(0); 4366 } 4367 4368 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4369 #undef __FUNCT__ 4370 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4371 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4372 { 4373 PetscErrorCode ierr; 4374 Mat_Merge_SeqsToMPI *merge; 4375 PetscContainer container; 4376 4377 PetscFunctionBegin; 4378 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4379 if (container) { 4380 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4381 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4382 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4383 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4384 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4385 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4386 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4387 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4388 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4389 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4390 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4391 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4392 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4393 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4394 ierr = PetscFree(merge);CHKERRQ(ierr); 4395 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4396 } 4397 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4398 PetscFunctionReturn(0); 4399 } 4400 4401 #include <../src/mat/utils/freespace.h> 4402 #include <petscbt.h> 4403 4404 #undef __FUNCT__ 4405 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4406 /*@C 4407 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4408 matrices from each processor 4409 4410 Collective on MPI_Comm 4411 4412 Input Parameters: 4413 + comm - the communicators the parallel matrix will live on 4414 . seqmat - the input sequential matrices 4415 . m - number of local rows (or PETSC_DECIDE) 4416 . n - number of local columns (or PETSC_DECIDE) 4417 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4418 4419 Output Parameter: 4420 . mpimat - the parallel matrix generated 4421 4422 Level: advanced 4423 4424 Notes: 4425 The dimensions of the sequential matrix in each processor MUST be the same. 4426 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4427 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4428 @*/ 4429 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4430 { 4431 PetscErrorCode ierr; 4432 MPI_Comm comm=((PetscObject)mpimat)->comm; 4433 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4434 PetscMPIInt size,rank,taga,*len_s; 4435 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4436 PetscInt proc,m; 4437 PetscInt **buf_ri,**buf_rj; 4438 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4439 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4440 MPI_Request *s_waits,*r_waits; 4441 MPI_Status *status; 4442 MatScalar *aa=a->a; 4443 MatScalar **abuf_r,*ba_i; 4444 Mat_Merge_SeqsToMPI *merge; 4445 PetscContainer container; 4446 4447 PetscFunctionBegin; 4448 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4449 4450 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4451 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4452 4453 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4454 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4455 4456 bi = merge->bi; 4457 bj = merge->bj; 4458 buf_ri = merge->buf_ri; 4459 buf_rj = merge->buf_rj; 4460 4461 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4462 owners = merge->rowmap->range; 4463 len_s = merge->len_s; 4464 4465 /* send and recv matrix values */ 4466 /*-----------------------------*/ 4467 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4468 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4469 4470 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4471 for (proc=0,k=0; proc<size; proc++){ 4472 if (!len_s[proc]) continue; 4473 i = owners[proc]; 4474 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4475 k++; 4476 } 4477 4478 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4479 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4480 ierr = PetscFree(status);CHKERRQ(ierr); 4481 4482 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4483 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4484 4485 /* insert mat values of mpimat */ 4486 /*----------------------------*/ 4487 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4488 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4489 4490 for (k=0; k<merge->nrecv; k++){ 4491 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4492 nrows = *(buf_ri_k[k]); 4493 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4494 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4495 } 4496 4497 /* set values of ba */ 4498 m = merge->rowmap->n; 4499 for (i=0; i<m; i++) { 4500 arow = owners[rank] + i; 4501 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4502 bnzi = bi[i+1] - bi[i]; 4503 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4504 4505 /* add local non-zero vals of this proc's seqmat into ba */ 4506 anzi = ai[arow+1] - ai[arow]; 4507 aj = a->j + ai[arow]; 4508 aa = a->a + ai[arow]; 4509 nextaj = 0; 4510 for (j=0; nextaj<anzi; j++){ 4511 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4512 ba_i[j] += aa[nextaj++]; 4513 } 4514 } 4515 4516 /* add received vals into ba */ 4517 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4518 /* i-th row */ 4519 if (i == *nextrow[k]) { 4520 anzi = *(nextai[k]+1) - *nextai[k]; 4521 aj = buf_rj[k] + *(nextai[k]); 4522 aa = abuf_r[k] + *(nextai[k]); 4523 nextaj = 0; 4524 for (j=0; nextaj<anzi; j++){ 4525 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4526 ba_i[j] += aa[nextaj++]; 4527 } 4528 } 4529 nextrow[k]++; nextai[k]++; 4530 } 4531 } 4532 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4533 } 4534 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4535 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4536 4537 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4538 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4539 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4540 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4541 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4542 PetscFunctionReturn(0); 4543 } 4544 4545 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4546 4547 #undef __FUNCT__ 4548 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4549 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4550 { 4551 PetscErrorCode ierr; 4552 Mat B_mpi; 4553 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4554 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4555 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4556 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4557 PetscInt len,proc,*dnz,*onz; 4558 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4559 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4560 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4561 MPI_Status *status; 4562 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4563 PetscBT lnkbt; 4564 Mat_Merge_SeqsToMPI *merge; 4565 PetscContainer container; 4566 4567 PetscFunctionBegin; 4568 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4569 4570 /* make sure it is a PETSc comm */ 4571 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4572 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4573 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4574 4575 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4576 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4577 4578 /* determine row ownership */ 4579 /*---------------------------------------------------------*/ 4580 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4581 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4582 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4583 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4584 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4585 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4586 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4587 4588 m = merge->rowmap->n; 4589 M = merge->rowmap->N; 4590 owners = merge->rowmap->range; 4591 4592 /* determine the number of messages to send, their lengths */ 4593 /*---------------------------------------------------------*/ 4594 len_s = merge->len_s; 4595 4596 len = 0; /* length of buf_si[] */ 4597 merge->nsend = 0; 4598 for (proc=0; proc<size; proc++){ 4599 len_si[proc] = 0; 4600 if (proc == rank){ 4601 len_s[proc] = 0; 4602 } else { 4603 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4604 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4605 } 4606 if (len_s[proc]) { 4607 merge->nsend++; 4608 nrows = 0; 4609 for (i=owners[proc]; i<owners[proc+1]; i++){ 4610 if (ai[i+1] > ai[i]) nrows++; 4611 } 4612 len_si[proc] = 2*(nrows+1); 4613 len += len_si[proc]; 4614 } 4615 } 4616 4617 /* determine the number and length of messages to receive for ij-structure */ 4618 /*-------------------------------------------------------------------------*/ 4619 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4620 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4621 4622 /* post the Irecv of j-structure */ 4623 /*-------------------------------*/ 4624 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4625 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4626 4627 /* post the Isend of j-structure */ 4628 /*--------------------------------*/ 4629 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4630 4631 for (proc=0, k=0; proc<size; proc++){ 4632 if (!len_s[proc]) continue; 4633 i = owners[proc]; 4634 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4635 k++; 4636 } 4637 4638 /* receives and sends of j-structure are complete */ 4639 /*------------------------------------------------*/ 4640 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4641 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4642 4643 /* send and recv i-structure */ 4644 /*---------------------------*/ 4645 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4646 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4647 4648 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4649 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4650 for (proc=0,k=0; proc<size; proc++){ 4651 if (!len_s[proc]) continue; 4652 /* form outgoing message for i-structure: 4653 buf_si[0]: nrows to be sent 4654 [1:nrows]: row index (global) 4655 [nrows+1:2*nrows+1]: i-structure index 4656 */ 4657 /*-------------------------------------------*/ 4658 nrows = len_si[proc]/2 - 1; 4659 buf_si_i = buf_si + nrows+1; 4660 buf_si[0] = nrows; 4661 buf_si_i[0] = 0; 4662 nrows = 0; 4663 for (i=owners[proc]; i<owners[proc+1]; i++){ 4664 anzi = ai[i+1] - ai[i]; 4665 if (anzi) { 4666 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4667 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4668 nrows++; 4669 } 4670 } 4671 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4672 k++; 4673 buf_si += len_si[proc]; 4674 } 4675 4676 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4677 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4678 4679 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4680 for (i=0; i<merge->nrecv; i++){ 4681 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4682 } 4683 4684 ierr = PetscFree(len_si);CHKERRQ(ierr); 4685 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4686 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4687 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4688 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4689 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4690 ierr = PetscFree(status);CHKERRQ(ierr); 4691 4692 /* compute a local seq matrix in each processor */ 4693 /*----------------------------------------------*/ 4694 /* allocate bi array and free space for accumulating nonzero column info */ 4695 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4696 bi[0] = 0; 4697 4698 /* create and initialize a linked list */ 4699 nlnk = N+1; 4700 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4701 4702 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4703 len = 0; 4704 len = ai[owners[rank+1]] - ai[owners[rank]]; 4705 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4706 current_space = free_space; 4707 4708 /* determine symbolic info for each local row */ 4709 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4710 4711 for (k=0; k<merge->nrecv; k++){ 4712 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4713 nrows = *buf_ri_k[k]; 4714 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4715 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4716 } 4717 4718 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4719 len = 0; 4720 for (i=0;i<m;i++) { 4721 bnzi = 0; 4722 /* add local non-zero cols of this proc's seqmat into lnk */ 4723 arow = owners[rank] + i; 4724 anzi = ai[arow+1] - ai[arow]; 4725 aj = a->j + ai[arow]; 4726 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4727 bnzi += nlnk; 4728 /* add received col data into lnk */ 4729 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4730 if (i == *nextrow[k]) { /* i-th row */ 4731 anzi = *(nextai[k]+1) - *nextai[k]; 4732 aj = buf_rj[k] + *nextai[k]; 4733 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4734 bnzi += nlnk; 4735 nextrow[k]++; nextai[k]++; 4736 } 4737 } 4738 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4739 4740 /* if free space is not available, make more free space */ 4741 if (current_space->local_remaining<bnzi) { 4742 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4743 nspacedouble++; 4744 } 4745 /* copy data into free space, then initialize lnk */ 4746 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4747 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4748 4749 current_space->array += bnzi; 4750 current_space->local_used += bnzi; 4751 current_space->local_remaining -= bnzi; 4752 4753 bi[i+1] = bi[i] + bnzi; 4754 } 4755 4756 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4757 4758 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4759 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4760 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4761 4762 /* create symbolic parallel matrix B_mpi */ 4763 /*---------------------------------------*/ 4764 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4765 if (n==PETSC_DECIDE) { 4766 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4767 } else { 4768 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4769 } 4770 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4771 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4772 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4773 4774 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4775 B_mpi->assembled = PETSC_FALSE; 4776 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4777 merge->bi = bi; 4778 merge->bj = bj; 4779 merge->buf_ri = buf_ri; 4780 merge->buf_rj = buf_rj; 4781 merge->coi = PETSC_NULL; 4782 merge->coj = PETSC_NULL; 4783 merge->owners_co = PETSC_NULL; 4784 4785 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4786 4787 /* attach the supporting struct to B_mpi for reuse */ 4788 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4789 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4790 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4791 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4792 *mpimat = B_mpi; 4793 4794 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4795 PetscFunctionReturn(0); 4796 } 4797 4798 #undef __FUNCT__ 4799 #define __FUNCT__ "MatMerge_SeqsToMPI" 4800 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4801 { 4802 PetscErrorCode ierr; 4803 4804 PetscFunctionBegin; 4805 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4806 if (scall == MAT_INITIAL_MATRIX){ 4807 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4808 } 4809 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4810 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4811 PetscFunctionReturn(0); 4812 } 4813 4814 #undef __FUNCT__ 4815 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4816 /*@ 4817 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4818 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4819 with MatGetSize() 4820 4821 Not Collective 4822 4823 Input Parameters: 4824 + A - the matrix 4825 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4826 4827 Output Parameter: 4828 . A_loc - the local sequential matrix generated 4829 4830 Level: developer 4831 4832 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4833 4834 @*/ 4835 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4836 { 4837 PetscErrorCode ierr; 4838 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4839 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4840 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4841 MatScalar *aa=a->a,*ba=b->a,*cam; 4842 PetscScalar *ca; 4843 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4844 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4845 PetscBool match; 4846 4847 PetscFunctionBegin; 4848 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4849 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4850 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4851 if (scall == MAT_INITIAL_MATRIX){ 4852 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4853 ci[0] = 0; 4854 for (i=0; i<am; i++){ 4855 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4856 } 4857 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4858 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4859 k = 0; 4860 for (i=0; i<am; i++) { 4861 ncols_o = bi[i+1] - bi[i]; 4862 ncols_d = ai[i+1] - ai[i]; 4863 /* off-diagonal portion of A */ 4864 for (jo=0; jo<ncols_o; jo++) { 4865 col = cmap[*bj]; 4866 if (col >= cstart) break; 4867 cj[k] = col; bj++; 4868 ca[k++] = *ba++; 4869 } 4870 /* diagonal portion of A */ 4871 for (j=0; j<ncols_d; j++) { 4872 cj[k] = cstart + *aj++; 4873 ca[k++] = *aa++; 4874 } 4875 /* off-diagonal portion of A */ 4876 for (j=jo; j<ncols_o; j++) { 4877 cj[k] = cmap[*bj++]; 4878 ca[k++] = *ba++; 4879 } 4880 } 4881 /* put together the new matrix */ 4882 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4883 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4884 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4885 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4886 mat->free_a = PETSC_TRUE; 4887 mat->free_ij = PETSC_TRUE; 4888 mat->nonew = 0; 4889 } else if (scall == MAT_REUSE_MATRIX){ 4890 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4891 ci = mat->i; cj = mat->j; cam = mat->a; 4892 for (i=0; i<am; i++) { 4893 /* off-diagonal portion of A */ 4894 ncols_o = bi[i+1] - bi[i]; 4895 for (jo=0; jo<ncols_o; jo++) { 4896 col = cmap[*bj]; 4897 if (col >= cstart) break; 4898 *cam++ = *ba++; bj++; 4899 } 4900 /* diagonal portion of A */ 4901 ncols_d = ai[i+1] - ai[i]; 4902 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4903 /* off-diagonal portion of A */ 4904 for (j=jo; j<ncols_o; j++) { 4905 *cam++ = *ba++; bj++; 4906 } 4907 } 4908 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4909 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4910 PetscFunctionReturn(0); 4911 } 4912 4913 #undef __FUNCT__ 4914 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4915 /*@C 4916 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4917 4918 Not Collective 4919 4920 Input Parameters: 4921 + A - the matrix 4922 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4923 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4924 4925 Output Parameter: 4926 . A_loc - the local sequential matrix generated 4927 4928 Level: developer 4929 4930 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4931 4932 @*/ 4933 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4934 { 4935 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4936 PetscErrorCode ierr; 4937 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4938 IS isrowa,iscola; 4939 Mat *aloc; 4940 PetscBool match; 4941 4942 PetscFunctionBegin; 4943 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4944 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4945 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4946 if (!row){ 4947 start = A->rmap->rstart; end = A->rmap->rend; 4948 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4949 } else { 4950 isrowa = *row; 4951 } 4952 if (!col){ 4953 start = A->cmap->rstart; 4954 cmap = a->garray; 4955 nzA = a->A->cmap->n; 4956 nzB = a->B->cmap->n; 4957 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4958 ncols = 0; 4959 for (i=0; i<nzB; i++) { 4960 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4961 else break; 4962 } 4963 imark = i; 4964 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4965 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4966 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4967 } else { 4968 iscola = *col; 4969 } 4970 if (scall != MAT_INITIAL_MATRIX){ 4971 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4972 aloc[0] = *A_loc; 4973 } 4974 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4975 *A_loc = aloc[0]; 4976 ierr = PetscFree(aloc);CHKERRQ(ierr); 4977 if (!row){ 4978 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4979 } 4980 if (!col){ 4981 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4982 } 4983 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4984 PetscFunctionReturn(0); 4985 } 4986 4987 #undef __FUNCT__ 4988 #define __FUNCT__ "MatGetBrowsOfAcols" 4989 /*@C 4990 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4991 4992 Collective on Mat 4993 4994 Input Parameters: 4995 + A,B - the matrices in mpiaij format 4996 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4997 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4998 4999 Output Parameter: 5000 + rowb, colb - index sets of rows and columns of B to extract 5001 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 5002 - B_seq - the sequential matrix generated 5003 5004 Level: developer 5005 5006 @*/ 5007 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 5008 { 5009 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5010 PetscErrorCode ierr; 5011 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5012 IS isrowb,iscolb; 5013 Mat *bseq; 5014 5015 PetscFunctionBegin; 5016 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5017 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5018 } 5019 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5020 5021 if (scall == MAT_INITIAL_MATRIX){ 5022 start = A->cmap->rstart; 5023 cmap = a->garray; 5024 nzA = a->A->cmap->n; 5025 nzB = a->B->cmap->n; 5026 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5027 ncols = 0; 5028 for (i=0; i<nzB; i++) { /* row < local row index */ 5029 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5030 else break; 5031 } 5032 imark = i; 5033 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5034 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5035 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5036 *brstart = imark; 5037 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5038 } else { 5039 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5040 isrowb = *rowb; iscolb = *colb; 5041 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5042 bseq[0] = *B_seq; 5043 } 5044 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5045 *B_seq = bseq[0]; 5046 ierr = PetscFree(bseq);CHKERRQ(ierr); 5047 if (!rowb){ 5048 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5049 } else { 5050 *rowb = isrowb; 5051 } 5052 if (!colb){ 5053 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5054 } else { 5055 *colb = iscolb; 5056 } 5057 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5058 PetscFunctionReturn(0); 5059 } 5060 5061 #undef __FUNCT__ 5062 #define __FUNCT__ "MatGetBrowsOfAoCols" 5063 /*@C 5064 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5065 of the OFF-DIAGONAL portion of local A 5066 5067 Collective on Mat 5068 5069 Input Parameters: 5070 + A,B - the matrices in mpiaij format 5071 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5072 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5073 . startsj_r - similar to startsj for receives 5074 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5075 5076 Output Parameter: 5077 + B_oth - the sequential matrix generated 5078 5079 Level: developer 5080 5081 @*/ 5082 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5083 { 5084 VecScatter_MPI_General *gen_to,*gen_from; 5085 PetscErrorCode ierr; 5086 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5087 Mat_SeqAIJ *b_oth; 5088 VecScatter ctx=a->Mvctx; 5089 MPI_Comm comm=((PetscObject)ctx)->comm; 5090 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5091 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5092 PetscScalar *rvalues,*svalues; 5093 MatScalar *b_otha,*bufa,*bufA; 5094 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5095 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5096 MPI_Status *sstatus,rstatus; 5097 PetscMPIInt jj; 5098 PetscInt *cols,sbs,rbs; 5099 PetscScalar *vals; 5100 5101 PetscFunctionBegin; 5102 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5103 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5104 } 5105 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5106 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5107 5108 gen_to = (VecScatter_MPI_General*)ctx->todata; 5109 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5110 rvalues = gen_from->values; /* holds the length of receiving row */ 5111 svalues = gen_to->values; /* holds the length of sending row */ 5112 nrecvs = gen_from->n; 5113 nsends = gen_to->n; 5114 5115 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5116 srow = gen_to->indices; /* local row index to be sent */ 5117 sstarts = gen_to->starts; 5118 sprocs = gen_to->procs; 5119 sstatus = gen_to->sstatus; 5120 sbs = gen_to->bs; 5121 rstarts = gen_from->starts; 5122 rprocs = gen_from->procs; 5123 rbs = gen_from->bs; 5124 5125 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5126 if (scall == MAT_INITIAL_MATRIX){ 5127 /* i-array */ 5128 /*---------*/ 5129 /* post receives */ 5130 for (i=0; i<nrecvs; i++){ 5131 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5132 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5133 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5134 } 5135 5136 /* pack the outgoing message */ 5137 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5138 sstartsj[0] = 0; rstartsj[0] = 0; 5139 len = 0; /* total length of j or a array to be sent */ 5140 k = 0; 5141 for (i=0; i<nsends; i++){ 5142 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5143 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5144 for (j=0; j<nrows; j++) { 5145 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5146 for (l=0; l<sbs; l++){ 5147 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5148 rowlen[j*sbs+l] = ncols; 5149 len += ncols; 5150 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5151 } 5152 k++; 5153 } 5154 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5155 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5156 } 5157 /* recvs and sends of i-array are completed */ 5158 i = nrecvs; 5159 while (i--) { 5160 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5161 } 5162 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5163 5164 /* allocate buffers for sending j and a arrays */ 5165 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5166 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5167 5168 /* create i-array of B_oth */ 5169 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5170 b_othi[0] = 0; 5171 len = 0; /* total length of j or a array to be received */ 5172 k = 0; 5173 for (i=0; i<nrecvs; i++){ 5174 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5175 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5176 for (j=0; j<nrows; j++) { 5177 b_othi[k+1] = b_othi[k] + rowlen[j]; 5178 len += rowlen[j]; k++; 5179 } 5180 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5181 } 5182 5183 /* allocate space for j and a arrrays of B_oth */ 5184 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5185 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5186 5187 /* j-array */ 5188 /*---------*/ 5189 /* post receives of j-array */ 5190 for (i=0; i<nrecvs; i++){ 5191 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5192 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5193 } 5194 5195 /* pack the outgoing message j-array */ 5196 k = 0; 5197 for (i=0; i<nsends; i++){ 5198 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5199 bufJ = bufj+sstartsj[i]; 5200 for (j=0; j<nrows; j++) { 5201 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5202 for (ll=0; ll<sbs; ll++){ 5203 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5204 for (l=0; l<ncols; l++){ 5205 *bufJ++ = cols[l]; 5206 } 5207 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5208 } 5209 } 5210 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5211 } 5212 5213 /* recvs and sends of j-array are completed */ 5214 i = nrecvs; 5215 while (i--) { 5216 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5217 } 5218 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5219 } else if (scall == MAT_REUSE_MATRIX){ 5220 sstartsj = *startsj; 5221 rstartsj = *startsj_r; 5222 bufa = *bufa_ptr; 5223 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5224 b_otha = b_oth->a; 5225 } else { 5226 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5227 } 5228 5229 /* a-array */ 5230 /*---------*/ 5231 /* post receives of a-array */ 5232 for (i=0; i<nrecvs; i++){ 5233 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5234 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5235 } 5236 5237 /* pack the outgoing message a-array */ 5238 k = 0; 5239 for (i=0; i<nsends; i++){ 5240 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5241 bufA = bufa+sstartsj[i]; 5242 for (j=0; j<nrows; j++) { 5243 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5244 for (ll=0; ll<sbs; ll++){ 5245 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5246 for (l=0; l<ncols; l++){ 5247 *bufA++ = vals[l]; 5248 } 5249 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5250 } 5251 } 5252 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5253 } 5254 /* recvs and sends of a-array are completed */ 5255 i = nrecvs; 5256 while (i--) { 5257 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5258 } 5259 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5260 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5261 5262 if (scall == MAT_INITIAL_MATRIX){ 5263 /* put together the new matrix */ 5264 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5265 5266 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5267 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5268 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5269 b_oth->free_a = PETSC_TRUE; 5270 b_oth->free_ij = PETSC_TRUE; 5271 b_oth->nonew = 0; 5272 5273 ierr = PetscFree(bufj);CHKERRQ(ierr); 5274 if (!startsj || !bufa_ptr){ 5275 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5276 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5277 } else { 5278 *startsj = sstartsj; 5279 *startsj_r = rstartsj; 5280 *bufa_ptr = bufa; 5281 } 5282 } 5283 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5284 PetscFunctionReturn(0); 5285 } 5286 5287 #undef __FUNCT__ 5288 #define __FUNCT__ "MatGetCommunicationStructs" 5289 /*@C 5290 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5291 5292 Not Collective 5293 5294 Input Parameters: 5295 . A - The matrix in mpiaij format 5296 5297 Output Parameter: 5298 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5299 . colmap - A map from global column index to local index into lvec 5300 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5301 5302 Level: developer 5303 5304 @*/ 5305 #if defined (PETSC_USE_CTABLE) 5306 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5307 #else 5308 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5309 #endif 5310 { 5311 Mat_MPIAIJ *a; 5312 5313 PetscFunctionBegin; 5314 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5315 PetscValidPointer(lvec, 2); 5316 PetscValidPointer(colmap, 3); 5317 PetscValidPointer(multScatter, 4); 5318 a = (Mat_MPIAIJ *) A->data; 5319 if (lvec) *lvec = a->lvec; 5320 if (colmap) *colmap = a->colmap; 5321 if (multScatter) *multScatter = a->Mvctx; 5322 PetscFunctionReturn(0); 5323 } 5324 5325 EXTERN_C_BEGIN 5326 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5327 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5328 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5329 EXTERN_C_END 5330 5331 #undef __FUNCT__ 5332 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5333 /* 5334 Computes (B'*A')' since computing B*A directly is untenable 5335 5336 n p p 5337 ( ) ( ) ( ) 5338 m ( A ) * n ( B ) = m ( C ) 5339 ( ) ( ) ( ) 5340 5341 */ 5342 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5343 { 5344 PetscErrorCode ierr; 5345 Mat At,Bt,Ct; 5346 5347 PetscFunctionBegin; 5348 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5349 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5350 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5351 ierr = MatDestroy(&At);CHKERRQ(ierr); 5352 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5353 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5354 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5355 PetscFunctionReturn(0); 5356 } 5357 5358 #undef __FUNCT__ 5359 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5360 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5361 { 5362 PetscErrorCode ierr; 5363 PetscInt m=A->rmap->n,n=B->cmap->n; 5364 Mat Cmat; 5365 5366 PetscFunctionBegin; 5367 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5368 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5369 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5370 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5371 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5372 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5373 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5374 *C = Cmat; 5375 PetscFunctionReturn(0); 5376 } 5377 5378 /* ----------------------------------------------------------------*/ 5379 #undef __FUNCT__ 5380 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5381 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5382 { 5383 PetscErrorCode ierr; 5384 5385 PetscFunctionBegin; 5386 if (scall == MAT_INITIAL_MATRIX){ 5387 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5388 } 5389 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5390 PetscFunctionReturn(0); 5391 } 5392 5393 EXTERN_C_BEGIN 5394 #if defined(PETSC_HAVE_MUMPS) 5395 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5396 #endif 5397 #if defined(PETSC_HAVE_PASTIX) 5398 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5399 #endif 5400 #if defined(PETSC_HAVE_SUPERLU_DIST) 5401 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5402 #endif 5403 #if defined(PETSC_HAVE_SPOOLES) 5404 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5405 #endif 5406 EXTERN_C_END 5407 5408 /*MC 5409 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5410 5411 Options Database Keys: 5412 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5413 5414 Level: beginner 5415 5416 .seealso: MatCreateMPIAIJ() 5417 M*/ 5418 5419 EXTERN_C_BEGIN 5420 #undef __FUNCT__ 5421 #define __FUNCT__ "MatCreate_MPIAIJ" 5422 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5423 { 5424 Mat_MPIAIJ *b; 5425 PetscErrorCode ierr; 5426 PetscMPIInt size; 5427 5428 PetscFunctionBegin; 5429 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5430 5431 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5432 B->data = (void*)b; 5433 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5434 B->rmap->bs = 1; 5435 B->assembled = PETSC_FALSE; 5436 5437 B->insertmode = NOT_SET_VALUES; 5438 b->size = size; 5439 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5440 5441 /* build cache for off array entries formed */ 5442 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5443 b->donotstash = PETSC_FALSE; 5444 b->colmap = 0; 5445 b->garray = 0; 5446 b->roworiented = PETSC_TRUE; 5447 5448 /* stuff used for matrix vector multiply */ 5449 b->lvec = PETSC_NULL; 5450 b->Mvctx = PETSC_NULL; 5451 5452 /* stuff for MatGetRow() */ 5453 b->rowindices = 0; 5454 b->rowvalues = 0; 5455 b->getrowactive = PETSC_FALSE; 5456 5457 #if defined(PETSC_HAVE_SPOOLES) 5458 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5459 "MatGetFactor_mpiaij_spooles", 5460 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5461 #endif 5462 #if defined(PETSC_HAVE_MUMPS) 5463 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5464 "MatGetFactor_aij_mumps", 5465 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5466 #endif 5467 #if defined(PETSC_HAVE_PASTIX) 5468 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5469 "MatGetFactor_mpiaij_pastix", 5470 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5471 #endif 5472 #if defined(PETSC_HAVE_SUPERLU_DIST) 5473 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5474 "MatGetFactor_mpiaij_superlu_dist", 5475 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5476 #endif 5477 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5478 "MatStoreValues_MPIAIJ", 5479 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5480 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5481 "MatRetrieveValues_MPIAIJ", 5482 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5483 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5484 "MatGetDiagonalBlock_MPIAIJ", 5485 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5486 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5487 "MatIsTranspose_MPIAIJ", 5488 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5489 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5490 "MatMPIAIJSetPreallocation_MPIAIJ", 5491 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5492 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5493 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5494 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5495 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5496 "MatDiagonalScaleLocal_MPIAIJ", 5497 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5499 "MatConvert_MPIAIJ_MPIAIJPERM", 5500 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5502 "MatConvert_MPIAIJ_MPIAIJCRL", 5503 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5504 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5505 "MatConvert_MPIAIJ_MPISBAIJ", 5506 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5507 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5508 "MatMatMult_MPIDense_MPIAIJ", 5509 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5511 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5512 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5513 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5514 "MatMatMultNumeric_MPIDense_MPIAIJ", 5515 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5516 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5517 PetscFunctionReturn(0); 5518 } 5519 EXTERN_C_END 5520 5521 #undef __FUNCT__ 5522 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5523 /*@ 5524 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5525 and "off-diagonal" part of the matrix in CSR format. 5526 5527 Collective on MPI_Comm 5528 5529 Input Parameters: 5530 + comm - MPI communicator 5531 . m - number of local rows (Cannot be PETSC_DECIDE) 5532 . n - This value should be the same as the local size used in creating the 5533 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5534 calculated if N is given) For square matrices n is almost always m. 5535 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5536 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5537 . i - row indices for "diagonal" portion of matrix 5538 . j - column indices 5539 . a - matrix values 5540 . oi - row indices for "off-diagonal" portion of matrix 5541 . oj - column indices 5542 - oa - matrix values 5543 5544 Output Parameter: 5545 . mat - the matrix 5546 5547 Level: advanced 5548 5549 Notes: 5550 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5551 must free the arrays once the matrix has been destroyed and not before. 5552 5553 The i and j indices are 0 based 5554 5555 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5556 5557 This sets local rows and cannot be used to set off-processor values. 5558 5559 You cannot later use MatSetValues() to change values in this matrix. 5560 5561 .keywords: matrix, aij, compressed row, sparse, parallel 5562 5563 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5564 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5565 @*/ 5566 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5567 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5568 { 5569 PetscErrorCode ierr; 5570 Mat_MPIAIJ *maij; 5571 5572 PetscFunctionBegin; 5573 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5574 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5575 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5576 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5577 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5578 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5579 maij = (Mat_MPIAIJ*) (*mat)->data; 5580 maij->donotstash = PETSC_TRUE; 5581 (*mat)->preallocated = PETSC_TRUE; 5582 5583 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5584 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5585 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5586 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5587 5588 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5589 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5590 5591 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5592 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5593 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5594 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5595 5596 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5597 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5598 PetscFunctionReturn(0); 5599 } 5600 5601 /* 5602 Special version for direct calls from Fortran 5603 */ 5604 #include <private/fortranimpl.h> 5605 5606 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5607 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5608 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5609 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5610 #endif 5611 5612 /* Change these macros so can be used in void function */ 5613 #undef CHKERRQ 5614 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5615 #undef SETERRQ2 5616 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5617 #undef SETERRQ 5618 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5619 5620 EXTERN_C_BEGIN 5621 #undef __FUNCT__ 5622 #define __FUNCT__ "matsetvaluesmpiaij_" 5623 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5624 { 5625 Mat mat = *mmat; 5626 PetscInt m = *mm, n = *mn; 5627 InsertMode addv = *maddv; 5628 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5629 PetscScalar value; 5630 PetscErrorCode ierr; 5631 5632 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5633 if (mat->insertmode == NOT_SET_VALUES) { 5634 mat->insertmode = addv; 5635 } 5636 #if defined(PETSC_USE_DEBUG) 5637 else if (mat->insertmode != addv) { 5638 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5639 } 5640 #endif 5641 { 5642 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5643 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5644 PetscBool roworiented = aij->roworiented; 5645 5646 /* Some Variables required in the macro */ 5647 Mat A = aij->A; 5648 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5649 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5650 MatScalar *aa = a->a; 5651 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5652 Mat B = aij->B; 5653 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5654 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5655 MatScalar *ba = b->a; 5656 5657 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5658 PetscInt nonew = a->nonew; 5659 MatScalar *ap1,*ap2; 5660 5661 PetscFunctionBegin; 5662 for (i=0; i<m; i++) { 5663 if (im[i] < 0) continue; 5664 #if defined(PETSC_USE_DEBUG) 5665 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5666 #endif 5667 if (im[i] >= rstart && im[i] < rend) { 5668 row = im[i] - rstart; 5669 lastcol1 = -1; 5670 rp1 = aj + ai[row]; 5671 ap1 = aa + ai[row]; 5672 rmax1 = aimax[row]; 5673 nrow1 = ailen[row]; 5674 low1 = 0; 5675 high1 = nrow1; 5676 lastcol2 = -1; 5677 rp2 = bj + bi[row]; 5678 ap2 = ba + bi[row]; 5679 rmax2 = bimax[row]; 5680 nrow2 = bilen[row]; 5681 low2 = 0; 5682 high2 = nrow2; 5683 5684 for (j=0; j<n; j++) { 5685 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5686 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5687 if (in[j] >= cstart && in[j] < cend){ 5688 col = in[j] - cstart; 5689 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5690 } else if (in[j] < 0) continue; 5691 #if defined(PETSC_USE_DEBUG) 5692 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5693 #endif 5694 else { 5695 if (mat->was_assembled) { 5696 if (!aij->colmap) { 5697 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5698 } 5699 #if defined (PETSC_USE_CTABLE) 5700 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5701 col--; 5702 #else 5703 col = aij->colmap[in[j]] - 1; 5704 #endif 5705 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5706 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5707 col = in[j]; 5708 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5709 B = aij->B; 5710 b = (Mat_SeqAIJ*)B->data; 5711 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5712 rp2 = bj + bi[row]; 5713 ap2 = ba + bi[row]; 5714 rmax2 = bimax[row]; 5715 nrow2 = bilen[row]; 5716 low2 = 0; 5717 high2 = nrow2; 5718 bm = aij->B->rmap->n; 5719 ba = b->a; 5720 } 5721 } else col = in[j]; 5722 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5723 } 5724 } 5725 } else { 5726 if (!aij->donotstash) { 5727 if (roworiented) { 5728 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5729 } else { 5730 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5731 } 5732 } 5733 } 5734 }} 5735 PetscFunctionReturnVoid(); 5736 } 5737 EXTERN_C_END 5738 5739