1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 1303 PetscFunctionBegin; 1304 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1305 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1306 nz = A->nz + B->nz; 1307 if (!rank) { 1308 header[0] = MAT_FILE_CLASSID; 1309 header[1] = mat->rmap->N; 1310 header[2] = mat->cmap->N; 1311 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1312 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1313 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1314 /* get largest number of rows any processor has */ 1315 rlen = mat->rmap->n; 1316 range = mat->rmap->range; 1317 for (i=1; i<size; i++) { 1318 rlen = PetscMax(rlen,range[i+1] - range[i]); 1319 } 1320 } else { 1321 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1322 rlen = mat->rmap->n; 1323 } 1324 1325 /* load up the local row counts */ 1326 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1327 for (i=0; i<mat->rmap->n; i++) { 1328 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1329 } 1330 1331 /* store the row lengths to the file */ 1332 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1333 if (!rank) { 1334 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 for (i=1; i<size; i++) { 1336 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1337 rlen = range[i+1] - range[i]; 1338 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1340 } 1341 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1344 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1348 1349 /* load up the local column indices */ 1350 nzmax = nz; /* )th processor needs space a largest processor needs */ 1351 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1352 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1353 cnt = 0; 1354 for (i=0; i<mat->rmap->n; i++) { 1355 for (j=B->i[i]; j<B->i[i+1]; j++) { 1356 if ( (col = garray[B->j[j]]) > cstart) break; 1357 column_indices[cnt++] = col; 1358 } 1359 for (k=A->i[i]; k<A->i[i+1]; k++) { 1360 column_indices[cnt++] = A->j[k] + cstart; 1361 } 1362 for (; j<B->i[i+1]; j++) { 1363 column_indices[cnt++] = garray[B->j[j]]; 1364 } 1365 } 1366 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1367 1368 /* store the column indices to the file */ 1369 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1370 if (!rank) { 1371 MPI_Status status; 1372 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1373 for (i=1; i<size; i++) { 1374 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1375 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1376 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1377 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1378 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1379 } 1380 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1383 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1384 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1388 1389 /* load up the local column values */ 1390 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1391 cnt = 0; 1392 for (i=0; i<mat->rmap->n; i++) { 1393 for (j=B->i[i]; j<B->i[i+1]; j++) { 1394 if ( garray[B->j[j]] > cstart) break; 1395 column_values[cnt++] = B->a[j]; 1396 } 1397 for (k=A->i[i]; k<A->i[i+1]; k++) { 1398 column_values[cnt++] = A->a[k]; 1399 } 1400 for (; j<B->i[i+1]; j++) { 1401 column_values[cnt++] = B->a[j]; 1402 } 1403 } 1404 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1405 1406 /* store the column values to the file */ 1407 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1408 if (!rank) { 1409 MPI_Status status; 1410 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1411 for (i=1; i<size; i++) { 1412 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1413 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1414 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1415 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1416 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1417 } 1418 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1419 } else { 1420 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1421 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1422 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1424 } 1425 ierr = PetscFree(column_values);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1432 { 1433 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1434 PetscErrorCode ierr; 1435 PetscMPIInt rank = aij->rank,size = aij->size; 1436 PetscBool isdraw,iascii,isbinary; 1437 PetscViewer sviewer; 1438 PetscViewerFormat format; 1439 1440 PetscFunctionBegin; 1441 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1442 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1443 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1444 if (iascii) { 1445 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1446 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1447 MatInfo info; 1448 PetscBool inodes; 1449 1450 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1451 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1452 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1454 if (!inodes) { 1455 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1456 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1457 } else { 1458 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1459 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1460 } 1461 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1463 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1465 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1468 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1471 PetscInt inodecount,inodelimit,*inodes; 1472 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1473 if (inodes) { 1474 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1475 } else { 1476 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1477 } 1478 PetscFunctionReturn(0); 1479 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1480 PetscFunctionReturn(0); 1481 } 1482 } else if (isbinary) { 1483 if (size == 1) { 1484 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1485 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1486 } else { 1487 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1488 } 1489 PetscFunctionReturn(0); 1490 } else if (isdraw) { 1491 PetscDraw draw; 1492 PetscBool isnull; 1493 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1494 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1495 } 1496 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 /* assemble the entire matrix onto first processor. */ 1502 Mat A; 1503 Mat_SeqAIJ *Aloc; 1504 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1505 MatScalar *a; 1506 1507 if (mat->rmap->N > 1024) { 1508 PetscBool flg = PETSC_FALSE; 1509 1510 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1511 if (!flg) { 1512 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1513 } 1514 } 1515 1516 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1517 if (!rank) { 1518 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1519 } else { 1520 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1521 } 1522 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1523 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1524 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1525 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1526 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1527 1528 /* copy over the A part */ 1529 Aloc = (Mat_SeqAIJ*)aij->A->data; 1530 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1531 row = mat->rmap->rstart; 1532 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1533 for (i=0; i<m; i++) { 1534 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1535 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1536 } 1537 aj = Aloc->j; 1538 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1539 1540 /* copy over the B part */ 1541 Aloc = (Mat_SeqAIJ*)aij->B->data; 1542 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1545 ct = cols; 1546 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1547 for (i=0; i<m; i++) { 1548 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1549 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1550 } 1551 ierr = PetscFree(ct);CHKERRQ(ierr); 1552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1554 /* 1555 Everyone has to call to draw the matrix since the graphics waits are 1556 synchronized across all processors that share the PetscDraw object 1557 */ 1558 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1559 if (!rank) { 1560 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1561 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1562 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1563 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1564 } 1565 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 ierr = MatDestroy(&A);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatView_MPIAIJ" 1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1574 { 1575 PetscErrorCode ierr; 1576 PetscBool iascii,isdraw,issocket,isbinary; 1577 1578 PetscFunctionBegin; 1579 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1580 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1581 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1582 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1583 if (iascii || isdraw || isbinary || issocket) { 1584 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1585 } else { 1586 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatSOR_MPIAIJ" 1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1594 { 1595 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1596 PetscErrorCode ierr; 1597 Vec bb1 = 0; 1598 PetscBool hasop; 1599 1600 PetscFunctionBegin; 1601 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1602 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1603 } 1604 1605 if (flag == SOR_APPLY_UPPER) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1611 if (flag & SOR_ZERO_INITIAL_GUESS) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 its--; 1614 } 1615 1616 while (its--) { 1617 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 1620 /* update rhs: bb1 = bb - B*x */ 1621 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1622 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1623 1624 /* local sweep */ 1625 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1626 } 1627 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1628 if (flag & SOR_ZERO_INITIAL_GUESS) { 1629 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1630 its--; 1631 } 1632 while (its--) { 1633 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1635 1636 /* update rhs: bb1 = bb - B*x */ 1637 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1638 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1639 1640 /* local sweep */ 1641 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1642 } 1643 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1644 if (flag & SOR_ZERO_INITIAL_GUESS) { 1645 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1646 its--; 1647 } 1648 while (its--) { 1649 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1651 1652 /* update rhs: bb1 = bb - B*x */ 1653 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1654 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1655 1656 /* local sweep */ 1657 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1658 } 1659 } else if (flag & SOR_EISENSTAT) { 1660 Vec xx1; 1661 1662 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1663 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1664 1665 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 if (!mat->diag) { 1668 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1669 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1670 } 1671 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1672 if (hasop) { 1673 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1674 } else { 1675 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1676 } 1677 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1678 1679 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1680 1681 /* local sweep */ 1682 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1683 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1684 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1685 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1686 1687 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatPermute_MPIAIJ" 1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1694 { 1695 MPI_Comm comm; 1696 PetscInt first,local_rowsize,local_colsize; 1697 const PetscInt *rows; 1698 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1703 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1704 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1705 /* collect the global row permutation and invert it */ 1706 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1707 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1708 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1709 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1710 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1711 /* get the local target indices */ 1712 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1713 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1714 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1716 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1717 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1718 /* the column permutation is so much easier; 1719 make a local version of 'colp' and invert it */ 1720 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1721 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1722 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1723 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1724 /* now we just get the submatrix */ 1725 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1726 /* clean up */ 1727 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1728 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1735 { 1736 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1737 Mat A = mat->A,B = mat->B; 1738 PetscErrorCode ierr; 1739 PetscReal isend[5],irecv[5]; 1740 1741 PetscFunctionBegin; 1742 info->block_size = 1.0; 1743 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1744 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1745 isend[3] = info->memory; isend[4] = info->mallocs; 1746 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1747 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1748 isend[3] += info->memory; isend[4] += info->mallocs; 1749 if (flag == MAT_LOCAL) { 1750 info->nz_used = isend[0]; 1751 info->nz_allocated = isend[1]; 1752 info->nz_unneeded = isend[2]; 1753 info->memory = isend[3]; 1754 info->mallocs = isend[4]; 1755 } else if (flag == MAT_GLOBAL_MAX) { 1756 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1757 info->nz_used = irecv[0]; 1758 info->nz_allocated = irecv[1]; 1759 info->nz_unneeded = irecv[2]; 1760 info->memory = irecv[3]; 1761 info->mallocs = irecv[4]; 1762 } else if (flag == MAT_GLOBAL_SUM) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } 1770 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1771 info->fill_ratio_needed = 0; 1772 info->factor_mallocs = 0; 1773 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatSetOption_MPIAIJ" 1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1780 { 1781 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1782 PetscErrorCode ierr; 1783 1784 PetscFunctionBegin; 1785 switch (op) { 1786 case MAT_NEW_NONZERO_LOCATIONS: 1787 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1788 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1789 case MAT_KEEP_NONZERO_PATTERN: 1790 case MAT_NEW_NONZERO_LOCATION_ERR: 1791 case MAT_USE_INODES: 1792 case MAT_IGNORE_ZERO_ENTRIES: 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1795 break; 1796 case MAT_ROW_ORIENTED: 1797 a->roworiented = flg; 1798 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1799 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1800 break; 1801 case MAT_NEW_DIAGONALS: 1802 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1803 break; 1804 case MAT_IGNORE_OFF_PROC_ENTRIES: 1805 a->donotstash = flg; 1806 break; 1807 case MAT_SPD: 1808 A->spd_set = PETSC_TRUE; 1809 A->spd = flg; 1810 if (flg) { 1811 A->symmetric = PETSC_TRUE; 1812 A->structurally_symmetric = PETSC_TRUE; 1813 A->symmetric_set = PETSC_TRUE; 1814 A->structurally_symmetric_set = PETSC_TRUE; 1815 } 1816 break; 1817 case MAT_SYMMETRIC: 1818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_STRUCTURALLY_SYMMETRIC: 1821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1822 break; 1823 case MAT_HERMITIAN: 1824 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1825 break; 1826 case MAT_SYMMETRY_ETERNAL: 1827 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1828 break; 1829 default: 1830 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1831 } 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatGetRow_MPIAIJ" 1837 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1838 { 1839 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1840 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1841 PetscErrorCode ierr; 1842 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1843 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1844 PetscInt *cmap,*idx_p; 1845 1846 PetscFunctionBegin; 1847 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1848 mat->getrowactive = PETSC_TRUE; 1849 1850 if (!mat->rowvalues && (idx || v)) { 1851 /* 1852 allocate enough space to hold information from the longest row. 1853 */ 1854 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1855 PetscInt max = 1,tmp; 1856 for (i=0; i<matin->rmap->n; i++) { 1857 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1858 if (max < tmp) { max = tmp; } 1859 } 1860 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1861 } 1862 1863 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1864 lrow = row - rstart; 1865 1866 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1867 if (!v) {pvA = 0; pvB = 0;} 1868 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1869 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1870 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1871 nztot = nzA + nzB; 1872 1873 cmap = mat->garray; 1874 if (v || idx) { 1875 if (nztot) { 1876 /* Sort by increasing column numbers, assuming A and B already sorted */ 1877 PetscInt imark = -1; 1878 if (v) { 1879 *v = v_p = mat->rowvalues; 1880 for (i=0; i<nzB; i++) { 1881 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1882 else break; 1883 } 1884 imark = i; 1885 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1886 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1887 } 1888 if (idx) { 1889 *idx = idx_p = mat->rowindices; 1890 if (imark > -1) { 1891 for (i=0; i<imark; i++) { 1892 idx_p[i] = cmap[cworkB[i]]; 1893 } 1894 } else { 1895 for (i=0; i<nzB; i++) { 1896 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1897 else break; 1898 } 1899 imark = i; 1900 } 1901 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1902 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1903 } 1904 } else { 1905 if (idx) *idx = 0; 1906 if (v) *v = 0; 1907 } 1908 } 1909 *nz = nztot; 1910 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1911 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1917 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1918 { 1919 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1920 1921 PetscFunctionBegin; 1922 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1923 aij->getrowactive = PETSC_FALSE; 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatNorm_MPIAIJ" 1929 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1930 { 1931 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1932 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1933 PetscErrorCode ierr; 1934 PetscInt i,j,cstart = mat->cmap->rstart; 1935 PetscReal sum = 0.0; 1936 MatScalar *v; 1937 1938 PetscFunctionBegin; 1939 if (aij->size == 1) { 1940 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1941 } else { 1942 if (type == NORM_FROBENIUS) { 1943 v = amat->a; 1944 for (i=0; i<amat->nz; i++) { 1945 #if defined(PETSC_USE_COMPLEX) 1946 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1947 #else 1948 sum += (*v)*(*v); v++; 1949 #endif 1950 } 1951 v = bmat->a; 1952 for (i=0; i<bmat->nz; i++) { 1953 #if defined(PETSC_USE_COMPLEX) 1954 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1955 #else 1956 sum += (*v)*(*v); v++; 1957 #endif 1958 } 1959 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1960 *norm = PetscSqrtReal(*norm); 1961 } else if (type == NORM_1) { /* max column norm */ 1962 PetscReal *tmp,*tmp2; 1963 PetscInt *jj,*garray = aij->garray; 1964 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1965 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1966 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1967 *norm = 0.0; 1968 v = amat->a; jj = amat->j; 1969 for (j=0; j<amat->nz; j++) { 1970 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1971 } 1972 v = bmat->a; jj = bmat->j; 1973 for (j=0; j<bmat->nz; j++) { 1974 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1975 } 1976 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1977 for (j=0; j<mat->cmap->N; j++) { 1978 if (tmp2[j] > *norm) *norm = tmp2[j]; 1979 } 1980 ierr = PetscFree(tmp);CHKERRQ(ierr); 1981 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1982 } else if (type == NORM_INFINITY) { /* max row norm */ 1983 PetscReal ntemp = 0.0; 1984 for (j=0; j<aij->A->rmap->n; j++) { 1985 v = amat->a + amat->i[j]; 1986 sum = 0.0; 1987 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1988 sum += PetscAbsScalar(*v); v++; 1989 } 1990 v = bmat->a + bmat->i[j]; 1991 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1992 sum += PetscAbsScalar(*v); v++; 1993 } 1994 if (sum > ntemp) ntemp = sum; 1995 } 1996 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1997 } else { 1998 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1999 } 2000 } 2001 PetscFunctionReturn(0); 2002 } 2003 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "MatTranspose_MPIAIJ" 2006 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2007 { 2008 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2009 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2010 PetscErrorCode ierr; 2011 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2012 PetscInt cstart=A->cmap->rstart,ncol; 2013 Mat B; 2014 MatScalar *array; 2015 2016 PetscFunctionBegin; 2017 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2018 2019 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2020 ai = Aloc->i; aj = Aloc->j; 2021 bi = Bloc->i; bj = Bloc->j; 2022 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2023 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2024 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2025 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2026 for (i=0; i<ai[ma]; i++){ 2027 d_nnz[aj[i]] ++; 2028 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2029 } 2030 2031 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2032 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2033 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2034 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2035 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2036 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2037 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2038 } else { 2039 B = *matout; 2040 } 2041 2042 /* copy over the A part */ 2043 array = Aloc->a; 2044 row = A->rmap->rstart; 2045 for (i=0; i<ma; i++) { 2046 ncol = ai[i+1]-ai[i]; 2047 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2048 row++; array += ncol; aj += ncol; 2049 } 2050 aj = Aloc->j; 2051 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2052 2053 /* copy over the B part */ 2054 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2055 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2056 array = Bloc->a; 2057 row = A->rmap->rstart; 2058 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2059 cols_tmp = cols; 2060 for (i=0; i<mb; i++) { 2061 ncol = bi[i+1]-bi[i]; 2062 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2063 row++; array += ncol; cols_tmp += ncol; 2064 } 2065 ierr = PetscFree(cols);CHKERRQ(ierr); 2066 2067 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2070 *matout = B; 2071 } else { 2072 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2079 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2080 { 2081 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2082 Mat a = aij->A,b = aij->B; 2083 PetscErrorCode ierr; 2084 PetscInt s1,s2,s3; 2085 2086 PetscFunctionBegin; 2087 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2088 if (rr) { 2089 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2090 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2091 /* Overlap communication with computation. */ 2092 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2093 } 2094 if (ll) { 2095 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2096 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2097 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2098 } 2099 /* scale the diagonal block */ 2100 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2101 2102 if (rr) { 2103 /* Do a scatter end and then right scale the off-diagonal block */ 2104 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2105 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2106 } 2107 2108 PetscFunctionReturn(0); 2109 } 2110 2111 #undef __FUNCT__ 2112 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2113 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2114 { 2115 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatEqual_MPIAIJ" 2125 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2126 { 2127 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2128 Mat a,b,c,d; 2129 PetscBool flg; 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 a = matA->A; b = matA->B; 2134 c = matB->A; d = matB->B; 2135 2136 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2137 if (flg) { 2138 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2139 } 2140 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 #undef __FUNCT__ 2145 #define __FUNCT__ "MatCopy_MPIAIJ" 2146 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2147 { 2148 PetscErrorCode ierr; 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2150 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2151 2152 PetscFunctionBegin; 2153 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2154 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2155 /* because of the column compression in the off-processor part of the matrix a->B, 2156 the number of columns in a->B and b->B may be different, hence we cannot call 2157 the MatCopy() directly on the two parts. If need be, we can provide a more 2158 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2159 then copying the submatrices */ 2160 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2161 } else { 2162 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2163 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatSetUp_MPIAIJ" 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2181 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2182 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2183 { 2184 PetscInt i,m=Y->rmap->N; 2185 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2186 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2187 const PetscInt *xi = x->i,*yi = y->i; 2188 2189 PetscFunctionBegin; 2190 /* Set the number of nonzeros in the new matrix */ 2191 for(i=0; i<m; i++) { 2192 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2193 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2194 nnz[i] = 0; 2195 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2196 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2197 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2198 nnz[i]++; 2199 } 2200 for (; k<nzy; k++) nnz[i]++; 2201 } 2202 PetscFunctionReturn(0); 2203 } 2204 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "MatAXPY_MPIAIJ" 2207 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2208 { 2209 PetscErrorCode ierr; 2210 PetscInt i; 2211 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2212 PetscBLASInt bnz,one=1; 2213 Mat_SeqAIJ *x,*y; 2214 2215 PetscFunctionBegin; 2216 if (str == SAME_NONZERO_PATTERN) { 2217 PetscScalar alpha = a; 2218 x = (Mat_SeqAIJ *)xx->A->data; 2219 y = (Mat_SeqAIJ *)yy->A->data; 2220 bnz = PetscBLASIntCast(x->nz); 2221 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2222 x = (Mat_SeqAIJ *)xx->B->data; 2223 y = (Mat_SeqAIJ *)yy->B->data; 2224 bnz = PetscBLASIntCast(x->nz); 2225 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2226 } else if (str == SUBSET_NONZERO_PATTERN) { 2227 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2228 2229 x = (Mat_SeqAIJ *)xx->B->data; 2230 y = (Mat_SeqAIJ *)yy->B->data; 2231 if (y->xtoy && y->XtoY != xx->B) { 2232 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2233 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2234 } 2235 if (!y->xtoy) { /* get xtoy */ 2236 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2237 y->XtoY = xx->B; 2238 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2239 } 2240 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2241 } else { 2242 Mat B; 2243 PetscInt *nnz_d,*nnz_o; 2244 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2245 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2246 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2247 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2248 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2249 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2250 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2251 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2253 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2254 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2255 ierr = MatHeaderReplace(Y,B); 2256 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatConjugate_MPIAIJ" 2266 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2267 { 2268 #if defined(PETSC_USE_COMPLEX) 2269 PetscErrorCode ierr; 2270 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2271 2272 PetscFunctionBegin; 2273 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2274 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2275 #else 2276 PetscFunctionBegin; 2277 #endif 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatRealPart_MPIAIJ" 2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2284 { 2285 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2286 PetscErrorCode ierr; 2287 2288 PetscFunctionBegin; 2289 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2290 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2296 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2297 { 2298 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2299 PetscErrorCode ierr; 2300 2301 PetscFunctionBegin; 2302 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2303 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #ifdef PETSC_HAVE_PBGL 2308 2309 #include <boost/parallel/mpi/bsp_process_group.hpp> 2310 #include <boost/graph/distributed/ilu_default_graph.hpp> 2311 #include <boost/graph/distributed/ilu_0_block.hpp> 2312 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2313 #include <boost/graph/distributed/petsc/interface.hpp> 2314 #include <boost/multi_array.hpp> 2315 #include <boost/parallel/distributed_property_map->hpp> 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2319 /* 2320 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2321 */ 2322 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2323 { 2324 namespace petsc = boost::distributed::petsc; 2325 2326 namespace graph_dist = boost::graph::distributed; 2327 using boost::graph::distributed::ilu_default::process_group_type; 2328 using boost::graph::ilu_permuted; 2329 2330 PetscBool row_identity, col_identity; 2331 PetscContainer c; 2332 PetscInt m, n, M, N; 2333 PetscErrorCode ierr; 2334 2335 PetscFunctionBegin; 2336 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2337 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2338 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2339 if (!row_identity || !col_identity) { 2340 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2341 } 2342 2343 process_group_type pg; 2344 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2345 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2346 lgraph_type& level_graph = *lgraph_p; 2347 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2348 2349 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2350 ilu_permuted(level_graph); 2351 2352 /* put together the new matrix */ 2353 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2354 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2355 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2356 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2357 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2358 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2359 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 2362 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2363 ierr = PetscContainerSetPointer(c, lgraph_p); 2364 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2365 ierr = PetscContainerDestroy(&c); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2371 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2372 { 2373 PetscFunctionBegin; 2374 PetscFunctionReturn(0); 2375 } 2376 2377 #undef __FUNCT__ 2378 #define __FUNCT__ "MatSolve_MPIAIJ" 2379 /* 2380 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2381 */ 2382 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2383 { 2384 namespace graph_dist = boost::graph::distributed; 2385 2386 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2387 lgraph_type* lgraph_p; 2388 PetscContainer c; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2393 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2394 ierr = VecCopy(b, x);CHKERRQ(ierr); 2395 2396 PetscScalar* array_x; 2397 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2398 PetscInt sx; 2399 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2400 2401 PetscScalar* array_b; 2402 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2403 PetscInt sb; 2404 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2405 2406 lgraph_type& level_graph = *lgraph_p; 2407 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2408 2409 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2410 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2411 ref_x(array_x, boost::extents[num_vertices(graph)]); 2412 2413 typedef boost::iterator_property_map<array_ref_type::iterator, 2414 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2415 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2416 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2417 2418 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2419 2420 PetscFunctionReturn(0); 2421 } 2422 #endif 2423 2424 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2425 PetscInt nzlocal,nsends,nrecvs; 2426 PetscMPIInt *send_rank,*recv_rank; 2427 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2428 PetscScalar *sbuf_a,**rbuf_a; 2429 PetscErrorCode (*Destroy)(Mat); 2430 } Mat_Redundant; 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2434 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2435 { 2436 PetscErrorCode ierr; 2437 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2438 PetscInt i; 2439 2440 PetscFunctionBegin; 2441 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2442 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2443 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2444 for (i=0; i<redund->nrecvs; i++){ 2445 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2446 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2447 } 2448 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2449 ierr = PetscFree(redund);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatDestroy_MatRedundant" 2455 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2456 { 2457 PetscErrorCode ierr; 2458 PetscContainer container; 2459 Mat_Redundant *redund=PETSC_NULL; 2460 2461 PetscFunctionBegin; 2462 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2463 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2464 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2465 A->ops->destroy = redund->Destroy; 2466 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2467 if (A->ops->destroy) { 2468 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2475 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2476 { 2477 PetscMPIInt rank,size; 2478 MPI_Comm comm=((PetscObject)mat)->comm; 2479 PetscErrorCode ierr; 2480 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2481 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2482 PetscInt *rowrange=mat->rmap->range; 2483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2484 Mat A=aij->A,B=aij->B,C=*matredundant; 2485 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2486 PetscScalar *sbuf_a; 2487 PetscInt nzlocal=a->nz+b->nz; 2488 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2489 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2490 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2491 MatScalar *aworkA,*aworkB; 2492 PetscScalar *vals; 2493 PetscMPIInt tag1,tag2,tag3,imdex; 2494 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2495 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2496 MPI_Status recv_status,*send_status; 2497 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2498 PetscInt **rbuf_j=PETSC_NULL; 2499 PetscScalar **rbuf_a=PETSC_NULL; 2500 Mat_Redundant *redund=PETSC_NULL; 2501 PetscContainer container; 2502 2503 PetscFunctionBegin; 2504 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2505 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2506 2507 if (reuse == MAT_REUSE_MATRIX) { 2508 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2509 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2510 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2511 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2512 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2513 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2514 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2515 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2516 2517 nsends = redund->nsends; 2518 nrecvs = redund->nrecvs; 2519 send_rank = redund->send_rank; 2520 recv_rank = redund->recv_rank; 2521 sbuf_nz = redund->sbuf_nz; 2522 rbuf_nz = redund->rbuf_nz; 2523 sbuf_j = redund->sbuf_j; 2524 sbuf_a = redund->sbuf_a; 2525 rbuf_j = redund->rbuf_j; 2526 rbuf_a = redund->rbuf_a; 2527 } 2528 2529 if (reuse == MAT_INITIAL_MATRIX){ 2530 PetscMPIInt subrank,subsize; 2531 PetscInt nleftover,np_subcomm; 2532 /* get the destination processors' id send_rank, nsends and nrecvs */ 2533 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2534 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2535 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2536 np_subcomm = size/nsubcomm; 2537 nleftover = size - nsubcomm*np_subcomm; 2538 nsends = 0; nrecvs = 0; 2539 for (i=0; i<size; i++){ /* i=rank*/ 2540 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2541 send_rank[nsends] = i; nsends++; 2542 recv_rank[nrecvs++] = i; 2543 } 2544 } 2545 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2546 i = size-nleftover-1; 2547 j = 0; 2548 while (j < nsubcomm - nleftover){ 2549 send_rank[nsends++] = i; 2550 i--; j++; 2551 } 2552 } 2553 2554 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2555 for (i=0; i<nleftover; i++){ 2556 recv_rank[nrecvs++] = size-nleftover+i; 2557 } 2558 } 2559 2560 /* allocate sbuf_j, sbuf_a */ 2561 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2562 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2563 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2564 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2565 2566 /* copy mat's local entries into the buffers */ 2567 if (reuse == MAT_INITIAL_MATRIX){ 2568 rownz_max = 0; 2569 rptr = sbuf_j; 2570 cols = sbuf_j + rend-rstart + 1; 2571 vals = sbuf_a; 2572 rptr[0] = 0; 2573 for (i=0; i<rend-rstart; i++){ 2574 row = i + rstart; 2575 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2576 ncols = nzA + nzB; 2577 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2578 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2579 /* load the column indices for this row into cols */ 2580 lwrite = 0; 2581 for (l=0; l<nzB; l++) { 2582 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2583 vals[lwrite] = aworkB[l]; 2584 cols[lwrite++] = ctmp; 2585 } 2586 } 2587 for (l=0; l<nzA; l++){ 2588 vals[lwrite] = aworkA[l]; 2589 cols[lwrite++] = cstart + cworkA[l]; 2590 } 2591 for (l=0; l<nzB; l++) { 2592 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2593 vals[lwrite] = aworkB[l]; 2594 cols[lwrite++] = ctmp; 2595 } 2596 } 2597 vals += ncols; 2598 cols += ncols; 2599 rptr[i+1] = rptr[i] + ncols; 2600 if (rownz_max < ncols) rownz_max = ncols; 2601 } 2602 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2603 } else { /* only copy matrix values into sbuf_a */ 2604 rptr = sbuf_j; 2605 vals = sbuf_a; 2606 rptr[0] = 0; 2607 for (i=0; i<rend-rstart; i++){ 2608 row = i + rstart; 2609 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2610 ncols = nzA + nzB; 2611 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2612 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2613 lwrite = 0; 2614 for (l=0; l<nzB; l++) { 2615 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2616 } 2617 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2618 for (l=0; l<nzB; l++) { 2619 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2620 } 2621 vals += ncols; 2622 rptr[i+1] = rptr[i] + ncols; 2623 } 2624 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2625 2626 /* send nzlocal to others, and recv other's nzlocal */ 2627 /*--------------------------------------------------*/ 2628 if (reuse == MAT_INITIAL_MATRIX){ 2629 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2630 s_waits2 = s_waits3 + nsends; 2631 s_waits1 = s_waits2 + nsends; 2632 r_waits1 = s_waits1 + nsends; 2633 r_waits2 = r_waits1 + nrecvs; 2634 r_waits3 = r_waits2 + nrecvs; 2635 } else { 2636 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2637 r_waits3 = s_waits3 + nsends; 2638 } 2639 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2641 if (reuse == MAT_INITIAL_MATRIX){ 2642 /* get new tags to keep the communication clean */ 2643 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2644 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2645 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2646 2647 /* post receives of other's nzlocal */ 2648 for (i=0; i<nrecvs; i++){ 2649 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2650 } 2651 /* send nzlocal to others */ 2652 for (i=0; i<nsends; i++){ 2653 sbuf_nz[i] = nzlocal; 2654 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2655 } 2656 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2657 count = nrecvs; 2658 while (count) { 2659 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2660 recv_rank[imdex] = recv_status.MPI_SOURCE; 2661 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2662 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2663 2664 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2665 rbuf_nz[imdex] += i + 2; 2666 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2667 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2668 count--; 2669 } 2670 /* wait on sends of nzlocal */ 2671 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2672 /* send mat->i,j to others, and recv from other's */ 2673 /*------------------------------------------------*/ 2674 for (i=0; i<nsends; i++){ 2675 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2676 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2677 } 2678 /* wait on receives of mat->i,j */ 2679 /*------------------------------*/ 2680 count = nrecvs; 2681 while (count) { 2682 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2683 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2684 count--; 2685 } 2686 /* wait on sends of mat->i,j */ 2687 /*---------------------------*/ 2688 if (nsends) { 2689 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2690 } 2691 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2692 2693 /* post receives, send and receive mat->a */ 2694 /*----------------------------------------*/ 2695 for (imdex=0; imdex<nrecvs; imdex++) { 2696 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2697 } 2698 for (i=0; i<nsends; i++){ 2699 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2700 } 2701 count = nrecvs; 2702 while (count) { 2703 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2704 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2705 count--; 2706 } 2707 if (nsends) { 2708 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2709 } 2710 2711 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2712 2713 /* create redundant matrix */ 2714 /*-------------------------*/ 2715 if (reuse == MAT_INITIAL_MATRIX){ 2716 /* compute rownz_max for preallocation */ 2717 for (imdex=0; imdex<nrecvs; imdex++){ 2718 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2719 rptr = rbuf_j[imdex]; 2720 for (i=0; i<j; i++){ 2721 ncols = rptr[i+1] - rptr[i]; 2722 if (rownz_max < ncols) rownz_max = ncols; 2723 } 2724 } 2725 2726 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2727 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2728 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2729 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2730 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2731 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2732 } else { 2733 C = *matredundant; 2734 } 2735 2736 /* insert local matrix entries */ 2737 rptr = sbuf_j; 2738 cols = sbuf_j + rend-rstart + 1; 2739 vals = sbuf_a; 2740 for (i=0; i<rend-rstart; i++){ 2741 row = i + rstart; 2742 ncols = rptr[i+1] - rptr[i]; 2743 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2744 vals += ncols; 2745 cols += ncols; 2746 } 2747 /* insert received matrix entries */ 2748 for (imdex=0; imdex<nrecvs; imdex++){ 2749 rstart = rowrange[recv_rank[imdex]]; 2750 rend = rowrange[recv_rank[imdex]+1]; 2751 rptr = rbuf_j[imdex]; 2752 cols = rbuf_j[imdex] + rend-rstart + 1; 2753 vals = rbuf_a[imdex]; 2754 for (i=0; i<rend-rstart; i++){ 2755 row = i + rstart; 2756 ncols = rptr[i+1] - rptr[i]; 2757 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2758 vals += ncols; 2759 cols += ncols; 2760 } 2761 } 2762 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2763 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2764 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2765 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2766 if (reuse == MAT_INITIAL_MATRIX) { 2767 PetscContainer container; 2768 *matredundant = C; 2769 /* create a supporting struct and attach it to C for reuse */ 2770 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2771 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2772 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2773 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2774 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2775 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2776 2777 redund->nzlocal = nzlocal; 2778 redund->nsends = nsends; 2779 redund->nrecvs = nrecvs; 2780 redund->send_rank = send_rank; 2781 redund->recv_rank = recv_rank; 2782 redund->sbuf_nz = sbuf_nz; 2783 redund->rbuf_nz = rbuf_nz; 2784 redund->sbuf_j = sbuf_j; 2785 redund->sbuf_a = sbuf_a; 2786 redund->rbuf_j = rbuf_j; 2787 redund->rbuf_a = rbuf_a; 2788 2789 redund->Destroy = C->ops->destroy; 2790 C->ops->destroy = MatDestroy_MatRedundant; 2791 } 2792 PetscFunctionReturn(0); 2793 } 2794 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2797 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2798 { 2799 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2800 PetscErrorCode ierr; 2801 PetscInt i,*idxb = 0; 2802 PetscScalar *va,*vb; 2803 Vec vtmp; 2804 2805 PetscFunctionBegin; 2806 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2807 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2808 if (idx) { 2809 for (i=0; i<A->rmap->n; i++) { 2810 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2811 } 2812 } 2813 2814 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2815 if (idx) { 2816 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2817 } 2818 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2819 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2820 2821 for (i=0; i<A->rmap->n; i++){ 2822 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2823 va[i] = vb[i]; 2824 if (idx) idx[i] = a->garray[idxb[i]]; 2825 } 2826 } 2827 2828 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2829 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2830 ierr = PetscFree(idxb);CHKERRQ(ierr); 2831 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 #undef __FUNCT__ 2836 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2837 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2838 { 2839 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2840 PetscErrorCode ierr; 2841 PetscInt i,*idxb = 0; 2842 PetscScalar *va,*vb; 2843 Vec vtmp; 2844 2845 PetscFunctionBegin; 2846 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2847 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2848 if (idx) { 2849 for (i=0; i<A->cmap->n; i++) { 2850 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2851 } 2852 } 2853 2854 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2855 if (idx) { 2856 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2857 } 2858 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2859 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2860 2861 for (i=0; i<A->rmap->n; i++){ 2862 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2863 va[i] = vb[i]; 2864 if (idx) idx[i] = a->garray[idxb[i]]; 2865 } 2866 } 2867 2868 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2869 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2870 ierr = PetscFree(idxb);CHKERRQ(ierr); 2871 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2872 PetscFunctionReturn(0); 2873 } 2874 2875 #undef __FUNCT__ 2876 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2877 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2878 { 2879 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2880 PetscInt n = A->rmap->n; 2881 PetscInt cstart = A->cmap->rstart; 2882 PetscInt *cmap = mat->garray; 2883 PetscInt *diagIdx, *offdiagIdx; 2884 Vec diagV, offdiagV; 2885 PetscScalar *a, *diagA, *offdiagA; 2886 PetscInt r; 2887 PetscErrorCode ierr; 2888 2889 PetscFunctionBegin; 2890 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2891 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2892 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2893 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2894 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2895 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2896 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2897 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2898 for(r = 0; r < n; ++r) { 2899 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2900 a[r] = diagA[r]; 2901 idx[r] = cstart + diagIdx[r]; 2902 } else { 2903 a[r] = offdiagA[r]; 2904 idx[r] = cmap[offdiagIdx[r]]; 2905 } 2906 } 2907 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2908 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2909 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2910 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2911 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2912 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2913 PetscFunctionReturn(0); 2914 } 2915 2916 #undef __FUNCT__ 2917 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2918 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2919 { 2920 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2921 PetscInt n = A->rmap->n; 2922 PetscInt cstart = A->cmap->rstart; 2923 PetscInt *cmap = mat->garray; 2924 PetscInt *diagIdx, *offdiagIdx; 2925 Vec diagV, offdiagV; 2926 PetscScalar *a, *diagA, *offdiagA; 2927 PetscInt r; 2928 PetscErrorCode ierr; 2929 2930 PetscFunctionBegin; 2931 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2932 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2933 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2934 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2935 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2936 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2937 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2938 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2939 for(r = 0; r < n; ++r) { 2940 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2941 a[r] = diagA[r]; 2942 idx[r] = cstart + diagIdx[r]; 2943 } else { 2944 a[r] = offdiagA[r]; 2945 idx[r] = cmap[offdiagIdx[r]]; 2946 } 2947 } 2948 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2949 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2950 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2951 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2952 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2953 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2954 PetscFunctionReturn(0); 2955 } 2956 2957 #undef __FUNCT__ 2958 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2959 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2960 { 2961 PetscErrorCode ierr; 2962 Mat *dummy; 2963 2964 PetscFunctionBegin; 2965 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2966 *newmat = *dummy; 2967 ierr = PetscFree(dummy);CHKERRQ(ierr); 2968 PetscFunctionReturn(0); 2969 } 2970 2971 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2975 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2976 { 2977 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2978 PetscErrorCode ierr; 2979 2980 PetscFunctionBegin; 2981 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2982 PetscFunctionReturn(0); 2983 } 2984 2985 2986 /* -------------------------------------------------------------------*/ 2987 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2988 MatGetRow_MPIAIJ, 2989 MatRestoreRow_MPIAIJ, 2990 MatMult_MPIAIJ, 2991 /* 4*/ MatMultAdd_MPIAIJ, 2992 MatMultTranspose_MPIAIJ, 2993 MatMultTransposeAdd_MPIAIJ, 2994 #ifdef PETSC_HAVE_PBGL 2995 MatSolve_MPIAIJ, 2996 #else 2997 0, 2998 #endif 2999 0, 3000 0, 3001 /*10*/ 0, 3002 0, 3003 0, 3004 MatSOR_MPIAIJ, 3005 MatTranspose_MPIAIJ, 3006 /*15*/ MatGetInfo_MPIAIJ, 3007 MatEqual_MPIAIJ, 3008 MatGetDiagonal_MPIAIJ, 3009 MatDiagonalScale_MPIAIJ, 3010 MatNorm_MPIAIJ, 3011 /*20*/ MatAssemblyBegin_MPIAIJ, 3012 MatAssemblyEnd_MPIAIJ, 3013 MatSetOption_MPIAIJ, 3014 MatZeroEntries_MPIAIJ, 3015 /*24*/ MatZeroRows_MPIAIJ, 3016 0, 3017 #ifdef PETSC_HAVE_PBGL 3018 0, 3019 #else 3020 0, 3021 #endif 3022 0, 3023 0, 3024 /*29*/ MatSetUp_MPIAIJ, 3025 #ifdef PETSC_HAVE_PBGL 3026 0, 3027 #else 3028 0, 3029 #endif 3030 0, 3031 0, 3032 0, 3033 /*34*/ MatDuplicate_MPIAIJ, 3034 0, 3035 0, 3036 0, 3037 0, 3038 /*39*/ MatAXPY_MPIAIJ, 3039 MatGetSubMatrices_MPIAIJ, 3040 MatIncreaseOverlap_MPIAIJ, 3041 MatGetValues_MPIAIJ, 3042 MatCopy_MPIAIJ, 3043 /*44*/ MatGetRowMax_MPIAIJ, 3044 MatScale_MPIAIJ, 3045 0, 3046 0, 3047 MatZeroRowsColumns_MPIAIJ, 3048 /*49*/ 0, 3049 0, 3050 0, 3051 0, 3052 0, 3053 /*54*/ MatFDColoringCreate_MPIAIJ, 3054 0, 3055 MatSetUnfactored_MPIAIJ, 3056 0, /* MatPermute_MPIAIJ, impl currently broken */ 3057 0, 3058 /*59*/ MatGetSubMatrix_MPIAIJ, 3059 MatDestroy_MPIAIJ, 3060 MatView_MPIAIJ, 3061 0, 3062 0, 3063 /*64*/ 0, 3064 0, 3065 0, 3066 0, 3067 0, 3068 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3069 MatGetRowMinAbs_MPIAIJ, 3070 0, 3071 MatSetColoring_MPIAIJ, 3072 #if defined(PETSC_HAVE_ADIC) 3073 MatSetValuesAdic_MPIAIJ, 3074 #else 3075 0, 3076 #endif 3077 MatSetValuesAdifor_MPIAIJ, 3078 /*75*/ MatFDColoringApply_AIJ, 3079 0, 3080 0, 3081 0, 3082 0, 3083 /*80*/ 0, 3084 0, 3085 0, 3086 /*83*/ MatLoad_MPIAIJ, 3087 0, 3088 0, 3089 0, 3090 0, 3091 0, 3092 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3093 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3094 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3095 MatPtAP_Basic, 3096 MatPtAPSymbolic_MPIAIJ, 3097 /*94*/ MatPtAPNumeric_MPIAIJ, 3098 0, 3099 0, 3100 0, 3101 0, 3102 /*99*/ 0, 3103 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3104 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3105 MatConjugate_MPIAIJ, 3106 0, 3107 /*104*/MatSetValuesRow_MPIAIJ, 3108 MatRealPart_MPIAIJ, 3109 MatImaginaryPart_MPIAIJ, 3110 0, 3111 0, 3112 /*109*/0, 3113 MatGetRedundantMatrix_MPIAIJ, 3114 MatGetRowMin_MPIAIJ, 3115 0, 3116 0, 3117 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3118 0, 3119 0, 3120 0, 3121 0, 3122 /*119*/0, 3123 0, 3124 0, 3125 0, 3126 MatGetMultiProcBlock_MPIAIJ, 3127 /*124*/MatFindNonZeroRows_MPIAIJ, 3128 MatGetColumnNorms_MPIAIJ, 3129 MatInvertBlockDiagonal_MPIAIJ, 3130 0, 3131 MatGetSubMatricesParallel_MPIAIJ, 3132 /*129*/0, 3133 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3134 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3135 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3136 0, 3137 /*134*/0, 3138 0, 3139 0, 3140 0, 3141 0 3142 }; 3143 3144 /* ----------------------------------------------------------------------------------------*/ 3145 3146 EXTERN_C_BEGIN 3147 #undef __FUNCT__ 3148 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3149 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3150 { 3151 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3152 PetscErrorCode ierr; 3153 3154 PetscFunctionBegin; 3155 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3156 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3157 PetscFunctionReturn(0); 3158 } 3159 EXTERN_C_END 3160 3161 EXTERN_C_BEGIN 3162 #undef __FUNCT__ 3163 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3164 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3165 { 3166 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3167 PetscErrorCode ierr; 3168 3169 PetscFunctionBegin; 3170 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3171 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3172 PetscFunctionReturn(0); 3173 } 3174 EXTERN_C_END 3175 3176 EXTERN_C_BEGIN 3177 #undef __FUNCT__ 3178 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3179 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3180 { 3181 Mat_MPIAIJ *b; 3182 PetscErrorCode ierr; 3183 PetscInt i; 3184 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3185 3186 PetscFunctionBegin; 3187 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3188 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3189 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3190 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3191 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3192 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3193 3194 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3195 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3196 if (d_nnz) { 3197 for (i=0; i<B->rmap->n; i++) { 3198 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3199 } 3200 } 3201 if (o_nnz) { 3202 for (i=0; i<B->rmap->n; i++) { 3203 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3204 } 3205 } 3206 b = (Mat_MPIAIJ*)B->data; 3207 3208 if (!B->preallocated) { 3209 /* Explicitly create 2 MATSEQAIJ matrices. */ 3210 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3211 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3212 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3213 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3214 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3215 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3216 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3217 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3218 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3219 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3220 } 3221 3222 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3223 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3224 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3225 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3226 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3227 B->preallocated = PETSC_TRUE; 3228 PetscFunctionReturn(0); 3229 } 3230 EXTERN_C_END 3231 3232 #undef __FUNCT__ 3233 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3234 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3235 { 3236 Mat mat; 3237 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3238 PetscErrorCode ierr; 3239 3240 PetscFunctionBegin; 3241 *newmat = 0; 3242 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3243 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3244 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3245 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3246 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3247 a = (Mat_MPIAIJ*)mat->data; 3248 3249 mat->factortype = matin->factortype; 3250 mat->rmap->bs = matin->rmap->bs; 3251 mat->cmap->bs = matin->cmap->bs; 3252 mat->assembled = PETSC_TRUE; 3253 mat->insertmode = NOT_SET_VALUES; 3254 mat->preallocated = PETSC_TRUE; 3255 3256 a->size = oldmat->size; 3257 a->rank = oldmat->rank; 3258 a->donotstash = oldmat->donotstash; 3259 a->roworiented = oldmat->roworiented; 3260 a->rowindices = 0; 3261 a->rowvalues = 0; 3262 a->getrowactive = PETSC_FALSE; 3263 3264 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3265 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3266 3267 if (oldmat->colmap) { 3268 #if defined (PETSC_USE_CTABLE) 3269 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3270 #else 3271 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3272 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3273 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3274 #endif 3275 } else a->colmap = 0; 3276 if (oldmat->garray) { 3277 PetscInt len; 3278 len = oldmat->B->cmap->n; 3279 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3280 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3281 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3282 } else a->garray = 0; 3283 3284 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3285 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3286 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3287 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3288 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3289 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3290 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3291 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3292 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3293 *newmat = mat; 3294 PetscFunctionReturn(0); 3295 } 3296 3297 3298 3299 #undef __FUNCT__ 3300 #define __FUNCT__ "MatLoad_MPIAIJ" 3301 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3302 { 3303 PetscScalar *vals,*svals; 3304 MPI_Comm comm = ((PetscObject)viewer)->comm; 3305 PetscErrorCode ierr; 3306 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3307 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3308 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3309 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3310 PetscInt cend,cstart,n,*rowners,sizesset=1; 3311 int fd; 3312 3313 PetscFunctionBegin; 3314 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3315 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3316 if (!rank) { 3317 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3318 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3319 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3320 } 3321 3322 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3323 3324 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3325 M = header[1]; N = header[2]; 3326 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3327 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3328 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3329 3330 /* If global sizes are set, check if they are consistent with that given in the file */ 3331 if (sizesset) { 3332 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3333 } 3334 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3335 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3336 3337 /* determine ownership of all rows */ 3338 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3339 else m = newMat->rmap->n; /* Set by user */ 3340 3341 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3342 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3343 3344 /* First process needs enough room for process with most rows */ 3345 if (!rank) { 3346 mmax = rowners[1]; 3347 for (i=2; i<size; i++) { 3348 mmax = PetscMax(mmax,rowners[i]); 3349 } 3350 } else mmax = m; 3351 3352 rowners[0] = 0; 3353 for (i=2; i<=size; i++) { 3354 rowners[i] += rowners[i-1]; 3355 } 3356 rstart = rowners[rank]; 3357 rend = rowners[rank+1]; 3358 3359 /* distribute row lengths to all processors */ 3360 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3361 if (!rank) { 3362 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3363 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3364 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3365 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3366 for (j=0; j<m; j++) { 3367 procsnz[0] += ourlens[j]; 3368 } 3369 for (i=1; i<size; i++) { 3370 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3371 /* calculate the number of nonzeros on each processor */ 3372 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3373 procsnz[i] += rowlengths[j]; 3374 } 3375 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3376 } 3377 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3378 } else { 3379 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3380 } 3381 3382 if (!rank) { 3383 /* determine max buffer needed and allocate it */ 3384 maxnz = 0; 3385 for (i=0; i<size; i++) { 3386 maxnz = PetscMax(maxnz,procsnz[i]); 3387 } 3388 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3389 3390 /* read in my part of the matrix column indices */ 3391 nz = procsnz[0]; 3392 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3393 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3394 3395 /* read in every one elses and ship off */ 3396 for (i=1; i<size; i++) { 3397 nz = procsnz[i]; 3398 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3399 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3400 } 3401 ierr = PetscFree(cols);CHKERRQ(ierr); 3402 } else { 3403 /* determine buffer space needed for message */ 3404 nz = 0; 3405 for (i=0; i<m; i++) { 3406 nz += ourlens[i]; 3407 } 3408 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3409 3410 /* receive message of column indices*/ 3411 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3412 } 3413 3414 /* determine column ownership if matrix is not square */ 3415 if (N != M) { 3416 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3417 else n = newMat->cmap->n; 3418 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3419 cstart = cend - n; 3420 } else { 3421 cstart = rstart; 3422 cend = rend; 3423 n = cend - cstart; 3424 } 3425 3426 /* loop over local rows, determining number of off diagonal entries */ 3427 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3428 jj = 0; 3429 for (i=0; i<m; i++) { 3430 for (j=0; j<ourlens[i]; j++) { 3431 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3432 jj++; 3433 } 3434 } 3435 3436 for (i=0; i<m; i++) { 3437 ourlens[i] -= offlens[i]; 3438 } 3439 if (!sizesset) { 3440 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3441 } 3442 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3443 3444 for (i=0; i<m; i++) { 3445 ourlens[i] += offlens[i]; 3446 } 3447 3448 if (!rank) { 3449 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3450 3451 /* read in my part of the matrix numerical values */ 3452 nz = procsnz[0]; 3453 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3454 3455 /* insert into matrix */ 3456 jj = rstart; 3457 smycols = mycols; 3458 svals = vals; 3459 for (i=0; i<m; i++) { 3460 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3461 smycols += ourlens[i]; 3462 svals += ourlens[i]; 3463 jj++; 3464 } 3465 3466 /* read in other processors and ship out */ 3467 for (i=1; i<size; i++) { 3468 nz = procsnz[i]; 3469 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3470 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3471 } 3472 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3473 } else { 3474 /* receive numeric values */ 3475 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3476 3477 /* receive message of values*/ 3478 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3479 3480 /* insert into matrix */ 3481 jj = rstart; 3482 smycols = mycols; 3483 svals = vals; 3484 for (i=0; i<m; i++) { 3485 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3486 smycols += ourlens[i]; 3487 svals += ourlens[i]; 3488 jj++; 3489 } 3490 } 3491 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3492 ierr = PetscFree(vals);CHKERRQ(ierr); 3493 ierr = PetscFree(mycols);CHKERRQ(ierr); 3494 ierr = PetscFree(rowners);CHKERRQ(ierr); 3495 3496 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3497 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3498 PetscFunctionReturn(0); 3499 } 3500 3501 #undef __FUNCT__ 3502 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3503 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3504 { 3505 PetscErrorCode ierr; 3506 IS iscol_local; 3507 PetscInt csize; 3508 3509 PetscFunctionBegin; 3510 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3511 if (call == MAT_REUSE_MATRIX) { 3512 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3513 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3514 } else { 3515 PetscInt cbs; 3516 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3517 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3518 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3519 } 3520 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3521 if (call == MAT_INITIAL_MATRIX) { 3522 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3523 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3524 } 3525 PetscFunctionReturn(0); 3526 } 3527 3528 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3529 #undef __FUNCT__ 3530 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3531 /* 3532 Not great since it makes two copies of the submatrix, first an SeqAIJ 3533 in local and then by concatenating the local matrices the end result. 3534 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3535 3536 Note: This requires a sequential iscol with all indices. 3537 */ 3538 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3539 { 3540 PetscErrorCode ierr; 3541 PetscMPIInt rank,size; 3542 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3543 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3544 PetscBool allcolumns, colflag; 3545 Mat M,Mreuse; 3546 MatScalar *vwork,*aa; 3547 MPI_Comm comm = ((PetscObject)mat)->comm; 3548 Mat_SeqAIJ *aij; 3549 3550 3551 PetscFunctionBegin; 3552 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3553 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3554 3555 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3556 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3557 if (colflag && ncol == mat->cmap->N){ 3558 allcolumns = PETSC_TRUE; 3559 } else { 3560 allcolumns = PETSC_FALSE; 3561 } 3562 if (call == MAT_REUSE_MATRIX) { 3563 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3564 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3565 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3566 } else { 3567 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3568 } 3569 3570 /* 3571 m - number of local rows 3572 n - number of columns (same on all processors) 3573 rstart - first row in new global matrix generated 3574 */ 3575 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3576 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3577 if (call == MAT_INITIAL_MATRIX) { 3578 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3579 ii = aij->i; 3580 jj = aij->j; 3581 3582 /* 3583 Determine the number of non-zeros in the diagonal and off-diagonal 3584 portions of the matrix in order to do correct preallocation 3585 */ 3586 3587 /* first get start and end of "diagonal" columns */ 3588 if (csize == PETSC_DECIDE) { 3589 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3590 if (mglobal == n) { /* square matrix */ 3591 nlocal = m; 3592 } else { 3593 nlocal = n/size + ((n % size) > rank); 3594 } 3595 } else { 3596 nlocal = csize; 3597 } 3598 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3599 rstart = rend - nlocal; 3600 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3601 3602 /* next, compute all the lengths */ 3603 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3604 olens = dlens + m; 3605 for (i=0; i<m; i++) { 3606 jend = ii[i+1] - ii[i]; 3607 olen = 0; 3608 dlen = 0; 3609 for (j=0; j<jend; j++) { 3610 if (*jj < rstart || *jj >= rend) olen++; 3611 else dlen++; 3612 jj++; 3613 } 3614 olens[i] = olen; 3615 dlens[i] = dlen; 3616 } 3617 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3618 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3619 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3620 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3621 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3622 ierr = PetscFree(dlens);CHKERRQ(ierr); 3623 } else { 3624 PetscInt ml,nl; 3625 3626 M = *newmat; 3627 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3628 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3629 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3630 /* 3631 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3632 rather than the slower MatSetValues(). 3633 */ 3634 M->was_assembled = PETSC_TRUE; 3635 M->assembled = PETSC_FALSE; 3636 } 3637 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3638 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3639 ii = aij->i; 3640 jj = aij->j; 3641 aa = aij->a; 3642 for (i=0; i<m; i++) { 3643 row = rstart + i; 3644 nz = ii[i+1] - ii[i]; 3645 cwork = jj; jj += nz; 3646 vwork = aa; aa += nz; 3647 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3648 } 3649 3650 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3651 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3652 *newmat = M; 3653 3654 /* save submatrix used in processor for next request */ 3655 if (call == MAT_INITIAL_MATRIX) { 3656 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3657 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3658 } 3659 3660 PetscFunctionReturn(0); 3661 } 3662 3663 EXTERN_C_BEGIN 3664 #undef __FUNCT__ 3665 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3666 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3667 { 3668 PetscInt m,cstart, cend,j,nnz,i,d; 3669 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3670 const PetscInt *JJ; 3671 PetscScalar *values; 3672 PetscErrorCode ierr; 3673 3674 PetscFunctionBegin; 3675 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3676 3677 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3678 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3679 m = B->rmap->n; 3680 cstart = B->cmap->rstart; 3681 cend = B->cmap->rend; 3682 rstart = B->rmap->rstart; 3683 3684 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3685 3686 #if defined(PETSC_USE_DEBUGGING) 3687 for (i=0; i<m; i++) { 3688 nnz = Ii[i+1]- Ii[i]; 3689 JJ = J + Ii[i]; 3690 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3691 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3692 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3693 } 3694 #endif 3695 3696 for (i=0; i<m; i++) { 3697 nnz = Ii[i+1]- Ii[i]; 3698 JJ = J + Ii[i]; 3699 nnz_max = PetscMax(nnz_max,nnz); 3700 d = 0; 3701 for (j=0; j<nnz; j++) { 3702 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3703 } 3704 d_nnz[i] = d; 3705 o_nnz[i] = nnz - d; 3706 } 3707 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3708 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3709 3710 if (v) values = (PetscScalar*)v; 3711 else { 3712 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3713 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3714 } 3715 3716 for (i=0; i<m; i++) { 3717 ii = i + rstart; 3718 nnz = Ii[i+1]- Ii[i]; 3719 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3720 } 3721 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3722 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3723 3724 if (!v) { 3725 ierr = PetscFree(values);CHKERRQ(ierr); 3726 } 3727 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3728 PetscFunctionReturn(0); 3729 } 3730 EXTERN_C_END 3731 3732 #undef __FUNCT__ 3733 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3734 /*@ 3735 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3736 (the default parallel PETSc format). 3737 3738 Collective on MPI_Comm 3739 3740 Input Parameters: 3741 + B - the matrix 3742 . i - the indices into j for the start of each local row (starts with zero) 3743 . j - the column indices for each local row (starts with zero) 3744 - v - optional values in the matrix 3745 3746 Level: developer 3747 3748 Notes: 3749 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3750 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3751 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3752 3753 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3754 3755 The format which is used for the sparse matrix input, is equivalent to a 3756 row-major ordering.. i.e for the following matrix, the input data expected is 3757 as shown: 3758 3759 1 0 0 3760 2 0 3 P0 3761 ------- 3762 4 5 6 P1 3763 3764 Process0 [P0]: rows_owned=[0,1] 3765 i = {0,1,3} [size = nrow+1 = 2+1] 3766 j = {0,0,2} [size = nz = 6] 3767 v = {1,2,3} [size = nz = 6] 3768 3769 Process1 [P1]: rows_owned=[2] 3770 i = {0,3} [size = nrow+1 = 1+1] 3771 j = {0,1,2} [size = nz = 6] 3772 v = {4,5,6} [size = nz = 6] 3773 3774 .keywords: matrix, aij, compressed row, sparse, parallel 3775 3776 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3777 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3778 @*/ 3779 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3780 { 3781 PetscErrorCode ierr; 3782 3783 PetscFunctionBegin; 3784 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3785 PetscFunctionReturn(0); 3786 } 3787 3788 #undef __FUNCT__ 3789 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3790 /*@C 3791 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3792 (the default parallel PETSc format). For good matrix assembly performance 3793 the user should preallocate the matrix storage by setting the parameters 3794 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3795 performance can be increased by more than a factor of 50. 3796 3797 Collective on MPI_Comm 3798 3799 Input Parameters: 3800 + A - the matrix 3801 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3802 (same value is used for all local rows) 3803 . d_nnz - array containing the number of nonzeros in the various rows of the 3804 DIAGONAL portion of the local submatrix (possibly different for each row) 3805 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3806 The size of this array is equal to the number of local rows, i.e 'm'. 3807 For matrices that will be factored, you must leave room for (and set) 3808 the diagonal entry even if it is zero. 3809 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3810 submatrix (same value is used for all local rows). 3811 - o_nnz - array containing the number of nonzeros in the various rows of the 3812 OFF-DIAGONAL portion of the local submatrix (possibly different for 3813 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3814 structure. The size of this array is equal to the number 3815 of local rows, i.e 'm'. 3816 3817 If the *_nnz parameter is given then the *_nz parameter is ignored 3818 3819 The AIJ format (also called the Yale sparse matrix format or 3820 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3821 storage. The stored row and column indices begin with zero. 3822 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3823 3824 The parallel matrix is partitioned such that the first m0 rows belong to 3825 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3826 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3827 3828 The DIAGONAL portion of the local submatrix of a processor can be defined 3829 as the submatrix which is obtained by extraction the part corresponding to 3830 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3831 first row that belongs to the processor, r2 is the last row belonging to 3832 the this processor, and c1-c2 is range of indices of the local part of a 3833 vector suitable for applying the matrix to. This is an mxn matrix. In the 3834 common case of a square matrix, the row and column ranges are the same and 3835 the DIAGONAL part is also square. The remaining portion of the local 3836 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3837 3838 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3839 3840 You can call MatGetInfo() to get information on how effective the preallocation was; 3841 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3842 You can also run with the option -info and look for messages with the string 3843 malloc in them to see if additional memory allocation was needed. 3844 3845 Example usage: 3846 3847 Consider the following 8x8 matrix with 34 non-zero values, that is 3848 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3849 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3850 as follows: 3851 3852 .vb 3853 1 2 0 | 0 3 0 | 0 4 3854 Proc0 0 5 6 | 7 0 0 | 8 0 3855 9 0 10 | 11 0 0 | 12 0 3856 ------------------------------------- 3857 13 0 14 | 15 16 17 | 0 0 3858 Proc1 0 18 0 | 19 20 21 | 0 0 3859 0 0 0 | 22 23 0 | 24 0 3860 ------------------------------------- 3861 Proc2 25 26 27 | 0 0 28 | 29 0 3862 30 0 0 | 31 32 33 | 0 34 3863 .ve 3864 3865 This can be represented as a collection of submatrices as: 3866 3867 .vb 3868 A B C 3869 D E F 3870 G H I 3871 .ve 3872 3873 Where the submatrices A,B,C are owned by proc0, D,E,F are 3874 owned by proc1, G,H,I are owned by proc2. 3875 3876 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3877 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3878 The 'M','N' parameters are 8,8, and have the same values on all procs. 3879 3880 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3881 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3882 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3883 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3884 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3885 matrix, ans [DF] as another SeqAIJ matrix. 3886 3887 When d_nz, o_nz parameters are specified, d_nz storage elements are 3888 allocated for every row of the local diagonal submatrix, and o_nz 3889 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3890 One way to choose d_nz and o_nz is to use the max nonzerors per local 3891 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3892 In this case, the values of d_nz,o_nz are: 3893 .vb 3894 proc0 : dnz = 2, o_nz = 2 3895 proc1 : dnz = 3, o_nz = 2 3896 proc2 : dnz = 1, o_nz = 4 3897 .ve 3898 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3899 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3900 for proc3. i.e we are using 12+15+10=37 storage locations to store 3901 34 values. 3902 3903 When d_nnz, o_nnz parameters are specified, the storage is specified 3904 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3905 In the above case the values for d_nnz,o_nnz are: 3906 .vb 3907 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3908 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3909 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3910 .ve 3911 Here the space allocated is sum of all the above values i.e 34, and 3912 hence pre-allocation is perfect. 3913 3914 Level: intermediate 3915 3916 .keywords: matrix, aij, compressed row, sparse, parallel 3917 3918 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3919 MPIAIJ, MatGetInfo() 3920 @*/ 3921 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3922 { 3923 PetscErrorCode ierr; 3924 3925 PetscFunctionBegin; 3926 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3927 PetscValidType(B,1); 3928 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3929 PetscFunctionReturn(0); 3930 } 3931 3932 #undef __FUNCT__ 3933 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3934 /*@ 3935 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3936 CSR format the local rows. 3937 3938 Collective on MPI_Comm 3939 3940 Input Parameters: 3941 + comm - MPI communicator 3942 . m - number of local rows (Cannot be PETSC_DECIDE) 3943 . n - This value should be the same as the local size used in creating the 3944 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3945 calculated if N is given) For square matrices n is almost always m. 3946 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3947 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3948 . i - row indices 3949 . j - column indices 3950 - a - matrix values 3951 3952 Output Parameter: 3953 . mat - the matrix 3954 3955 Level: intermediate 3956 3957 Notes: 3958 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3959 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3960 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3961 3962 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3963 3964 The format which is used for the sparse matrix input, is equivalent to a 3965 row-major ordering.. i.e for the following matrix, the input data expected is 3966 as shown: 3967 3968 1 0 0 3969 2 0 3 P0 3970 ------- 3971 4 5 6 P1 3972 3973 Process0 [P0]: rows_owned=[0,1] 3974 i = {0,1,3} [size = nrow+1 = 2+1] 3975 j = {0,0,2} [size = nz = 6] 3976 v = {1,2,3} [size = nz = 6] 3977 3978 Process1 [P1]: rows_owned=[2] 3979 i = {0,3} [size = nrow+1 = 1+1] 3980 j = {0,1,2} [size = nz = 6] 3981 v = {4,5,6} [size = nz = 6] 3982 3983 .keywords: matrix, aij, compressed row, sparse, parallel 3984 3985 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3986 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3987 @*/ 3988 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3989 { 3990 PetscErrorCode ierr; 3991 3992 PetscFunctionBegin; 3993 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3994 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3995 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3996 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3997 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 3998 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3999 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4000 PetscFunctionReturn(0); 4001 } 4002 4003 #undef __FUNCT__ 4004 #define __FUNCT__ "MatCreateAIJ" 4005 /*@C 4006 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4007 (the default parallel PETSc format). For good matrix assembly performance 4008 the user should preallocate the matrix storage by setting the parameters 4009 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4010 performance can be increased by more than a factor of 50. 4011 4012 Collective on MPI_Comm 4013 4014 Input Parameters: 4015 + comm - MPI communicator 4016 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4017 This value should be the same as the local size used in creating the 4018 y vector for the matrix-vector product y = Ax. 4019 . n - This value should be the same as the local size used in creating the 4020 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4021 calculated if N is given) For square matrices n is almost always m. 4022 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4023 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4024 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4025 (same value is used for all local rows) 4026 . d_nnz - array containing the number of nonzeros in the various rows of the 4027 DIAGONAL portion of the local submatrix (possibly different for each row) 4028 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4029 The size of this array is equal to the number of local rows, i.e 'm'. 4030 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4031 submatrix (same value is used for all local rows). 4032 - o_nnz - array containing the number of nonzeros in the various rows of the 4033 OFF-DIAGONAL portion of the local submatrix (possibly different for 4034 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4035 structure. The size of this array is equal to the number 4036 of local rows, i.e 'm'. 4037 4038 Output Parameter: 4039 . A - the matrix 4040 4041 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4042 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4043 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4044 4045 Notes: 4046 If the *_nnz parameter is given then the *_nz parameter is ignored 4047 4048 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4049 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4050 storage requirements for this matrix. 4051 4052 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4053 processor than it must be used on all processors that share the object for 4054 that argument. 4055 4056 The user MUST specify either the local or global matrix dimensions 4057 (possibly both). 4058 4059 The parallel matrix is partitioned across processors such that the 4060 first m0 rows belong to process 0, the next m1 rows belong to 4061 process 1, the next m2 rows belong to process 2 etc.. where 4062 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4063 values corresponding to [m x N] submatrix. 4064 4065 The columns are logically partitioned with the n0 columns belonging 4066 to 0th partition, the next n1 columns belonging to the next 4067 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4068 4069 The DIAGONAL portion of the local submatrix on any given processor 4070 is the submatrix corresponding to the rows and columns m,n 4071 corresponding to the given processor. i.e diagonal matrix on 4072 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4073 etc. The remaining portion of the local submatrix [m x (N-n)] 4074 constitute the OFF-DIAGONAL portion. The example below better 4075 illustrates this concept. 4076 4077 For a square global matrix we define each processor's diagonal portion 4078 to be its local rows and the corresponding columns (a square submatrix); 4079 each processor's off-diagonal portion encompasses the remainder of the 4080 local matrix (a rectangular submatrix). 4081 4082 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4083 4084 When calling this routine with a single process communicator, a matrix of 4085 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4086 type of communicator, use the construction mechanism: 4087 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4088 4089 By default, this format uses inodes (identical nodes) when possible. 4090 We search for consecutive rows with the same nonzero structure, thereby 4091 reusing matrix information to achieve increased efficiency. 4092 4093 Options Database Keys: 4094 + -mat_no_inode - Do not use inodes 4095 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4096 - -mat_aij_oneindex - Internally use indexing starting at 1 4097 rather than 0. Note that when calling MatSetValues(), 4098 the user still MUST index entries starting at 0! 4099 4100 4101 Example usage: 4102 4103 Consider the following 8x8 matrix with 34 non-zero values, that is 4104 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4105 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4106 as follows: 4107 4108 .vb 4109 1 2 0 | 0 3 0 | 0 4 4110 Proc0 0 5 6 | 7 0 0 | 8 0 4111 9 0 10 | 11 0 0 | 12 0 4112 ------------------------------------- 4113 13 0 14 | 15 16 17 | 0 0 4114 Proc1 0 18 0 | 19 20 21 | 0 0 4115 0 0 0 | 22 23 0 | 24 0 4116 ------------------------------------- 4117 Proc2 25 26 27 | 0 0 28 | 29 0 4118 30 0 0 | 31 32 33 | 0 34 4119 .ve 4120 4121 This can be represented as a collection of submatrices as: 4122 4123 .vb 4124 A B C 4125 D E F 4126 G H I 4127 .ve 4128 4129 Where the submatrices A,B,C are owned by proc0, D,E,F are 4130 owned by proc1, G,H,I are owned by proc2. 4131 4132 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4133 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4134 The 'M','N' parameters are 8,8, and have the same values on all procs. 4135 4136 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4137 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4138 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4139 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4140 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4141 matrix, ans [DF] as another SeqAIJ matrix. 4142 4143 When d_nz, o_nz parameters are specified, d_nz storage elements are 4144 allocated for every row of the local diagonal submatrix, and o_nz 4145 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4146 One way to choose d_nz and o_nz is to use the max nonzerors per local 4147 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4148 In this case, the values of d_nz,o_nz are: 4149 .vb 4150 proc0 : dnz = 2, o_nz = 2 4151 proc1 : dnz = 3, o_nz = 2 4152 proc2 : dnz = 1, o_nz = 4 4153 .ve 4154 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4155 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4156 for proc3. i.e we are using 12+15+10=37 storage locations to store 4157 34 values. 4158 4159 When d_nnz, o_nnz parameters are specified, the storage is specified 4160 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4161 In the above case the values for d_nnz,o_nnz are: 4162 .vb 4163 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4164 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4165 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4166 .ve 4167 Here the space allocated is sum of all the above values i.e 34, and 4168 hence pre-allocation is perfect. 4169 4170 Level: intermediate 4171 4172 .keywords: matrix, aij, compressed row, sparse, parallel 4173 4174 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4175 MPIAIJ, MatCreateMPIAIJWithArrays() 4176 @*/ 4177 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4178 { 4179 PetscErrorCode ierr; 4180 PetscMPIInt size; 4181 4182 PetscFunctionBegin; 4183 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4184 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4185 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4186 if (size > 1) { 4187 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4188 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4189 } else { 4190 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4191 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4192 } 4193 PetscFunctionReturn(0); 4194 } 4195 4196 #undef __FUNCT__ 4197 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4198 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4199 { 4200 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4201 4202 PetscFunctionBegin; 4203 *Ad = a->A; 4204 *Ao = a->B; 4205 *colmap = a->garray; 4206 PetscFunctionReturn(0); 4207 } 4208 4209 #undef __FUNCT__ 4210 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4211 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4212 { 4213 PetscErrorCode ierr; 4214 PetscInt i; 4215 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4216 4217 PetscFunctionBegin; 4218 if (coloring->ctype == IS_COLORING_GLOBAL) { 4219 ISColoringValue *allcolors,*colors; 4220 ISColoring ocoloring; 4221 4222 /* set coloring for diagonal portion */ 4223 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4224 4225 /* set coloring for off-diagonal portion */ 4226 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4227 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4228 for (i=0; i<a->B->cmap->n; i++) { 4229 colors[i] = allcolors[a->garray[i]]; 4230 } 4231 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4232 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4233 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4234 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4235 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4236 ISColoringValue *colors; 4237 PetscInt *larray; 4238 ISColoring ocoloring; 4239 4240 /* set coloring for diagonal portion */ 4241 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4242 for (i=0; i<a->A->cmap->n; i++) { 4243 larray[i] = i + A->cmap->rstart; 4244 } 4245 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4246 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4247 for (i=0; i<a->A->cmap->n; i++) { 4248 colors[i] = coloring->colors[larray[i]]; 4249 } 4250 ierr = PetscFree(larray);CHKERRQ(ierr); 4251 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4252 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4253 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4254 4255 /* set coloring for off-diagonal portion */ 4256 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4257 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4258 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4259 for (i=0; i<a->B->cmap->n; i++) { 4260 colors[i] = coloring->colors[larray[i]]; 4261 } 4262 ierr = PetscFree(larray);CHKERRQ(ierr); 4263 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4264 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4265 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4266 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4267 4268 PetscFunctionReturn(0); 4269 } 4270 4271 #if defined(PETSC_HAVE_ADIC) 4272 #undef __FUNCT__ 4273 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4274 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4275 { 4276 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4277 PetscErrorCode ierr; 4278 4279 PetscFunctionBegin; 4280 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4281 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4282 PetscFunctionReturn(0); 4283 } 4284 #endif 4285 4286 #undef __FUNCT__ 4287 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4288 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4289 { 4290 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4291 PetscErrorCode ierr; 4292 4293 PetscFunctionBegin; 4294 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4295 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4296 PetscFunctionReturn(0); 4297 } 4298 4299 #undef __FUNCT__ 4300 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4301 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4302 { 4303 PetscErrorCode ierr; 4304 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4305 PetscInt *indx; 4306 4307 PetscFunctionBegin; 4308 /* This routine will ONLY return MPIAIJ type matrix */ 4309 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4310 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4311 if (n == PETSC_DECIDE){ 4312 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4313 } 4314 /* Check sum(n) = N */ 4315 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4316 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4317 4318 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4319 rstart -= m; 4320 4321 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4322 for (i=0;i<m;i++) { 4323 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4324 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4325 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4326 } 4327 4328 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4329 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4330 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4331 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4332 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4333 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4334 PetscFunctionReturn(0); 4335 } 4336 4337 #undef __FUNCT__ 4338 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4339 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4340 { 4341 PetscErrorCode ierr; 4342 PetscInt m,N,i,rstart,nnz,Ii; 4343 PetscInt *indx; 4344 PetscScalar *values; 4345 4346 PetscFunctionBegin; 4347 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4348 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4349 for (i=0;i<m;i++) { 4350 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4351 Ii = i + rstart; 4352 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4353 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4354 } 4355 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4356 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4357 PetscFunctionReturn(0); 4358 } 4359 4360 #undef __FUNCT__ 4361 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4362 /*@ 4363 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4364 matrices from each processor 4365 4366 Collective on MPI_Comm 4367 4368 Input Parameters: 4369 + comm - the communicators the parallel matrix will live on 4370 . inmat - the input sequential matrices 4371 . n - number of local columns (or PETSC_DECIDE) 4372 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4373 4374 Output Parameter: 4375 . outmat - the parallel matrix generated 4376 4377 Level: advanced 4378 4379 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4380 4381 @*/ 4382 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4383 { 4384 PetscErrorCode ierr; 4385 4386 PetscFunctionBegin; 4387 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4388 if (scall == MAT_INITIAL_MATRIX){ 4389 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4390 } 4391 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4392 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4393 PetscFunctionReturn(0); 4394 } 4395 4396 #undef __FUNCT__ 4397 #define __FUNCT__ "MatFileSplit" 4398 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4399 { 4400 PetscErrorCode ierr; 4401 PetscMPIInt rank; 4402 PetscInt m,N,i,rstart,nnz; 4403 size_t len; 4404 const PetscInt *indx; 4405 PetscViewer out; 4406 char *name; 4407 Mat B; 4408 const PetscScalar *values; 4409 4410 PetscFunctionBegin; 4411 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4412 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4413 /* Should this be the type of the diagonal block of A? */ 4414 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4415 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4416 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4417 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4418 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4419 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4420 for (i=0;i<m;i++) { 4421 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4422 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4423 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4424 } 4425 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4426 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4427 4428 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4429 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4430 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4431 sprintf(name,"%s.%d",outfile,rank); 4432 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4433 ierr = PetscFree(name); 4434 ierr = MatView(B,out);CHKERRQ(ierr); 4435 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4436 ierr = MatDestroy(&B);CHKERRQ(ierr); 4437 PetscFunctionReturn(0); 4438 } 4439 4440 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4441 #undef __FUNCT__ 4442 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4443 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4444 { 4445 PetscErrorCode ierr; 4446 Mat_Merge_SeqsToMPI *merge; 4447 PetscContainer container; 4448 4449 PetscFunctionBegin; 4450 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4451 if (container) { 4452 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4453 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4454 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4455 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4456 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4457 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4458 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4459 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4460 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4461 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4462 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4463 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4464 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4465 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4466 ierr = PetscFree(merge);CHKERRQ(ierr); 4467 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4468 } 4469 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4470 PetscFunctionReturn(0); 4471 } 4472 4473 #include <../src/mat/utils/freespace.h> 4474 #include <petscbt.h> 4475 4476 #undef __FUNCT__ 4477 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4478 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4479 { 4480 PetscErrorCode ierr; 4481 MPI_Comm comm=((PetscObject)mpimat)->comm; 4482 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4483 PetscMPIInt size,rank,taga,*len_s; 4484 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4485 PetscInt proc,m; 4486 PetscInt **buf_ri,**buf_rj; 4487 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4488 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4489 MPI_Request *s_waits,*r_waits; 4490 MPI_Status *status; 4491 MatScalar *aa=a->a; 4492 MatScalar **abuf_r,*ba_i; 4493 Mat_Merge_SeqsToMPI *merge; 4494 PetscContainer container; 4495 4496 PetscFunctionBegin; 4497 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4498 4499 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4500 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4501 4502 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4503 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4504 4505 bi = merge->bi; 4506 bj = merge->bj; 4507 buf_ri = merge->buf_ri; 4508 buf_rj = merge->buf_rj; 4509 4510 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4511 owners = merge->rowmap->range; 4512 len_s = merge->len_s; 4513 4514 /* send and recv matrix values */ 4515 /*-----------------------------*/ 4516 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4517 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4518 4519 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4520 for (proc=0,k=0; proc<size; proc++){ 4521 if (!len_s[proc]) continue; 4522 i = owners[proc]; 4523 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4524 k++; 4525 } 4526 4527 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4528 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4529 ierr = PetscFree(status);CHKERRQ(ierr); 4530 4531 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4532 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4533 4534 /* insert mat values of mpimat */ 4535 /*----------------------------*/ 4536 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4537 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4538 4539 for (k=0; k<merge->nrecv; k++){ 4540 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4541 nrows = *(buf_ri_k[k]); 4542 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4543 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4544 } 4545 4546 /* set values of ba */ 4547 m = merge->rowmap->n; 4548 for (i=0; i<m; i++) { 4549 arow = owners[rank] + i; 4550 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4551 bnzi = bi[i+1] - bi[i]; 4552 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4553 4554 /* add local non-zero vals of this proc's seqmat into ba */ 4555 anzi = ai[arow+1] - ai[arow]; 4556 aj = a->j + ai[arow]; 4557 aa = a->a + ai[arow]; 4558 nextaj = 0; 4559 for (j=0; nextaj<anzi; j++){ 4560 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4561 ba_i[j] += aa[nextaj++]; 4562 } 4563 } 4564 4565 /* add received vals into ba */ 4566 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4567 /* i-th row */ 4568 if (i == *nextrow[k]) { 4569 anzi = *(nextai[k]+1) - *nextai[k]; 4570 aj = buf_rj[k] + *(nextai[k]); 4571 aa = abuf_r[k] + *(nextai[k]); 4572 nextaj = 0; 4573 for (j=0; nextaj<anzi; j++){ 4574 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4575 ba_i[j] += aa[nextaj++]; 4576 } 4577 } 4578 nextrow[k]++; nextai[k]++; 4579 } 4580 } 4581 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4582 } 4583 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4584 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4585 4586 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4587 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4588 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4589 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4590 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4591 PetscFunctionReturn(0); 4592 } 4593 4594 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4595 4596 #undef __FUNCT__ 4597 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4598 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4599 { 4600 PetscErrorCode ierr; 4601 Mat B_mpi; 4602 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4603 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4604 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4605 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4606 PetscInt len,proc,*dnz,*onz,bs,cbs; 4607 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4608 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4609 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4610 MPI_Status *status; 4611 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4612 PetscBT lnkbt; 4613 Mat_Merge_SeqsToMPI *merge; 4614 PetscContainer container; 4615 4616 PetscFunctionBegin; 4617 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4618 4619 /* make sure it is a PETSc comm */ 4620 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4621 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4622 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4623 4624 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4625 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4626 4627 /* determine row ownership */ 4628 /*---------------------------------------------------------*/ 4629 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4630 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4631 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4632 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4633 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4634 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4635 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4636 4637 m = merge->rowmap->n; 4638 M = merge->rowmap->N; 4639 owners = merge->rowmap->range; 4640 4641 /* determine the number of messages to send, their lengths */ 4642 /*---------------------------------------------------------*/ 4643 len_s = merge->len_s; 4644 4645 len = 0; /* length of buf_si[] */ 4646 merge->nsend = 0; 4647 for (proc=0; proc<size; proc++){ 4648 len_si[proc] = 0; 4649 if (proc == rank){ 4650 len_s[proc] = 0; 4651 } else { 4652 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4653 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4654 } 4655 if (len_s[proc]) { 4656 merge->nsend++; 4657 nrows = 0; 4658 for (i=owners[proc]; i<owners[proc+1]; i++){ 4659 if (ai[i+1] > ai[i]) nrows++; 4660 } 4661 len_si[proc] = 2*(nrows+1); 4662 len += len_si[proc]; 4663 } 4664 } 4665 4666 /* determine the number and length of messages to receive for ij-structure */ 4667 /*-------------------------------------------------------------------------*/ 4668 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4669 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4670 4671 /* post the Irecv of j-structure */ 4672 /*-------------------------------*/ 4673 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4674 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4675 4676 /* post the Isend of j-structure */ 4677 /*--------------------------------*/ 4678 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4679 4680 for (proc=0, k=0; proc<size; proc++){ 4681 if (!len_s[proc]) continue; 4682 i = owners[proc]; 4683 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4684 k++; 4685 } 4686 4687 /* receives and sends of j-structure are complete */ 4688 /*------------------------------------------------*/ 4689 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4690 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4691 4692 /* send and recv i-structure */ 4693 /*---------------------------*/ 4694 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4695 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4696 4697 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4698 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4699 for (proc=0,k=0; proc<size; proc++){ 4700 if (!len_s[proc]) continue; 4701 /* form outgoing message for i-structure: 4702 buf_si[0]: nrows to be sent 4703 [1:nrows]: row index (global) 4704 [nrows+1:2*nrows+1]: i-structure index 4705 */ 4706 /*-------------------------------------------*/ 4707 nrows = len_si[proc]/2 - 1; 4708 buf_si_i = buf_si + nrows+1; 4709 buf_si[0] = nrows; 4710 buf_si_i[0] = 0; 4711 nrows = 0; 4712 for (i=owners[proc]; i<owners[proc+1]; i++){ 4713 anzi = ai[i+1] - ai[i]; 4714 if (anzi) { 4715 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4716 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4717 nrows++; 4718 } 4719 } 4720 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4721 k++; 4722 buf_si += len_si[proc]; 4723 } 4724 4725 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4726 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4727 4728 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4729 for (i=0; i<merge->nrecv; i++){ 4730 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4731 } 4732 4733 ierr = PetscFree(len_si);CHKERRQ(ierr); 4734 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4735 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4736 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4737 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4738 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4739 ierr = PetscFree(status);CHKERRQ(ierr); 4740 4741 /* compute a local seq matrix in each processor */ 4742 /*----------------------------------------------*/ 4743 /* allocate bi array and free space for accumulating nonzero column info */ 4744 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4745 bi[0] = 0; 4746 4747 /* create and initialize a linked list */ 4748 nlnk = N+1; 4749 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4750 4751 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4752 len = 0; 4753 len = ai[owners[rank+1]] - ai[owners[rank]]; 4754 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4755 current_space = free_space; 4756 4757 /* determine symbolic info for each local row */ 4758 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4759 4760 for (k=0; k<merge->nrecv; k++){ 4761 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4762 nrows = *buf_ri_k[k]; 4763 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4764 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4765 } 4766 4767 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4768 len = 0; 4769 for (i=0;i<m;i++) { 4770 bnzi = 0; 4771 /* add local non-zero cols of this proc's seqmat into lnk */ 4772 arow = owners[rank] + i; 4773 anzi = ai[arow+1] - ai[arow]; 4774 aj = a->j + ai[arow]; 4775 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4776 bnzi += nlnk; 4777 /* add received col data into lnk */ 4778 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4779 if (i == *nextrow[k]) { /* i-th row */ 4780 anzi = *(nextai[k]+1) - *nextai[k]; 4781 aj = buf_rj[k] + *nextai[k]; 4782 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4783 bnzi += nlnk; 4784 nextrow[k]++; nextai[k]++; 4785 } 4786 } 4787 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4788 4789 /* if free space is not available, make more free space */ 4790 if (current_space->local_remaining<bnzi) { 4791 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4792 nspacedouble++; 4793 } 4794 /* copy data into free space, then initialize lnk */ 4795 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4796 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4797 4798 current_space->array += bnzi; 4799 current_space->local_used += bnzi; 4800 current_space->local_remaining -= bnzi; 4801 4802 bi[i+1] = bi[i] + bnzi; 4803 } 4804 4805 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4806 4807 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4808 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4809 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4810 4811 /* create symbolic parallel matrix B_mpi */ 4812 /*---------------------------------------*/ 4813 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4814 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4815 if (n==PETSC_DECIDE) { 4816 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4817 } else { 4818 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4819 } 4820 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4821 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4822 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4823 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4824 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4825 4826 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4827 B_mpi->assembled = PETSC_FALSE; 4828 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4829 merge->bi = bi; 4830 merge->bj = bj; 4831 merge->buf_ri = buf_ri; 4832 merge->buf_rj = buf_rj; 4833 merge->coi = PETSC_NULL; 4834 merge->coj = PETSC_NULL; 4835 merge->owners_co = PETSC_NULL; 4836 4837 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4838 4839 /* attach the supporting struct to B_mpi for reuse */ 4840 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4841 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4842 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4843 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4844 *mpimat = B_mpi; 4845 4846 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4847 PetscFunctionReturn(0); 4848 } 4849 4850 #undef __FUNCT__ 4851 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4852 /*@C 4853 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4854 matrices from each processor 4855 4856 Collective on MPI_Comm 4857 4858 Input Parameters: 4859 + comm - the communicators the parallel matrix will live on 4860 . seqmat - the input sequential matrices 4861 . m - number of local rows (or PETSC_DECIDE) 4862 . n - number of local columns (or PETSC_DECIDE) 4863 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4864 4865 Output Parameter: 4866 . mpimat - the parallel matrix generated 4867 4868 Level: advanced 4869 4870 Notes: 4871 The dimensions of the sequential matrix in each processor MUST be the same. 4872 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4873 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4874 @*/ 4875 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4876 { 4877 PetscErrorCode ierr; 4878 PetscMPIInt size; 4879 4880 PetscFunctionBegin; 4881 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4882 if (size == 1){ 4883 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4884 if (scall == MAT_INITIAL_MATRIX){ 4885 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4886 } else { 4887 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4888 } 4889 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4890 PetscFunctionReturn(0); 4891 } 4892 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4893 if (scall == MAT_INITIAL_MATRIX){ 4894 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4895 } 4896 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4897 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4898 PetscFunctionReturn(0); 4899 } 4900 4901 #undef __FUNCT__ 4902 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4903 /*@ 4904 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4905 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4906 with MatGetSize() 4907 4908 Not Collective 4909 4910 Input Parameters: 4911 + A - the matrix 4912 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4913 4914 Output Parameter: 4915 . A_loc - the local sequential matrix generated 4916 4917 Level: developer 4918 4919 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4920 4921 @*/ 4922 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4923 { 4924 PetscErrorCode ierr; 4925 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4926 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4927 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4928 MatScalar *aa=a->a,*ba=b->a,*cam; 4929 PetscScalar *ca; 4930 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4931 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4932 PetscBool match; 4933 4934 PetscFunctionBegin; 4935 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4936 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4937 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4938 if (scall == MAT_INITIAL_MATRIX){ 4939 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4940 ci[0] = 0; 4941 for (i=0; i<am; i++){ 4942 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4943 } 4944 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4945 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4946 k = 0; 4947 for (i=0; i<am; i++) { 4948 ncols_o = bi[i+1] - bi[i]; 4949 ncols_d = ai[i+1] - ai[i]; 4950 /* off-diagonal portion of A */ 4951 for (jo=0; jo<ncols_o; jo++) { 4952 col = cmap[*bj]; 4953 if (col >= cstart) break; 4954 cj[k] = col; bj++; 4955 ca[k++] = *ba++; 4956 } 4957 /* diagonal portion of A */ 4958 for (j=0; j<ncols_d; j++) { 4959 cj[k] = cstart + *aj++; 4960 ca[k++] = *aa++; 4961 } 4962 /* off-diagonal portion of A */ 4963 for (j=jo; j<ncols_o; j++) { 4964 cj[k] = cmap[*bj++]; 4965 ca[k++] = *ba++; 4966 } 4967 } 4968 /* put together the new matrix */ 4969 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4970 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4971 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4972 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4973 mat->free_a = PETSC_TRUE; 4974 mat->free_ij = PETSC_TRUE; 4975 mat->nonew = 0; 4976 } else if (scall == MAT_REUSE_MATRIX){ 4977 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4978 ci = mat->i; cj = mat->j; cam = mat->a; 4979 for (i=0; i<am; i++) { 4980 /* off-diagonal portion of A */ 4981 ncols_o = bi[i+1] - bi[i]; 4982 for (jo=0; jo<ncols_o; jo++) { 4983 col = cmap[*bj]; 4984 if (col >= cstart) break; 4985 *cam++ = *ba++; bj++; 4986 } 4987 /* diagonal portion of A */ 4988 ncols_d = ai[i+1] - ai[i]; 4989 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4990 /* off-diagonal portion of A */ 4991 for (j=jo; j<ncols_o; j++) { 4992 *cam++ = *ba++; bj++; 4993 } 4994 } 4995 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4996 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4997 PetscFunctionReturn(0); 4998 } 4999 5000 #undef __FUNCT__ 5001 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5002 /*@C 5003 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5004 5005 Not Collective 5006 5007 Input Parameters: 5008 + A - the matrix 5009 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5010 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5011 5012 Output Parameter: 5013 . A_loc - the local sequential matrix generated 5014 5015 Level: developer 5016 5017 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5018 5019 @*/ 5020 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5021 { 5022 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5023 PetscErrorCode ierr; 5024 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5025 IS isrowa,iscola; 5026 Mat *aloc; 5027 PetscBool match; 5028 5029 PetscFunctionBegin; 5030 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5031 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5032 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5033 if (!row){ 5034 start = A->rmap->rstart; end = A->rmap->rend; 5035 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5036 } else { 5037 isrowa = *row; 5038 } 5039 if (!col){ 5040 start = A->cmap->rstart; 5041 cmap = a->garray; 5042 nzA = a->A->cmap->n; 5043 nzB = a->B->cmap->n; 5044 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5045 ncols = 0; 5046 for (i=0; i<nzB; i++) { 5047 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5048 else break; 5049 } 5050 imark = i; 5051 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5052 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5053 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5054 } else { 5055 iscola = *col; 5056 } 5057 if (scall != MAT_INITIAL_MATRIX){ 5058 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5059 aloc[0] = *A_loc; 5060 } 5061 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5062 *A_loc = aloc[0]; 5063 ierr = PetscFree(aloc);CHKERRQ(ierr); 5064 if (!row){ 5065 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5066 } 5067 if (!col){ 5068 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5069 } 5070 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5071 PetscFunctionReturn(0); 5072 } 5073 5074 #undef __FUNCT__ 5075 #define __FUNCT__ "MatGetBrowsOfAcols" 5076 /*@C 5077 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5078 5079 Collective on Mat 5080 5081 Input Parameters: 5082 + A,B - the matrices in mpiaij format 5083 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5084 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5085 5086 Output Parameter: 5087 + rowb, colb - index sets of rows and columns of B to extract 5088 - B_seq - the sequential matrix generated 5089 5090 Level: developer 5091 5092 @*/ 5093 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5094 { 5095 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5096 PetscErrorCode ierr; 5097 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5098 IS isrowb,iscolb; 5099 Mat *bseq=PETSC_NULL; 5100 5101 PetscFunctionBegin; 5102 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5103 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5104 } 5105 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5106 5107 if (scall == MAT_INITIAL_MATRIX){ 5108 start = A->cmap->rstart; 5109 cmap = a->garray; 5110 nzA = a->A->cmap->n; 5111 nzB = a->B->cmap->n; 5112 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5113 ncols = 0; 5114 for (i=0; i<nzB; i++) { /* row < local row index */ 5115 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5116 else break; 5117 } 5118 imark = i; 5119 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5120 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5121 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5122 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5123 } else { 5124 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5125 isrowb = *rowb; iscolb = *colb; 5126 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5127 bseq[0] = *B_seq; 5128 } 5129 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5130 *B_seq = bseq[0]; 5131 ierr = PetscFree(bseq);CHKERRQ(ierr); 5132 if (!rowb){ 5133 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5134 } else { 5135 *rowb = isrowb; 5136 } 5137 if (!colb){ 5138 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5139 } else { 5140 *colb = iscolb; 5141 } 5142 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5143 PetscFunctionReturn(0); 5144 } 5145 5146 #undef __FUNCT__ 5147 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5148 /* 5149 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5150 of the OFF-DIAGONAL portion of local A 5151 5152 Collective on Mat 5153 5154 Input Parameters: 5155 + A,B - the matrices in mpiaij format 5156 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5157 5158 Output Parameter: 5159 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5160 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5161 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5162 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5163 5164 Level: developer 5165 5166 */ 5167 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5168 { 5169 VecScatter_MPI_General *gen_to,*gen_from; 5170 PetscErrorCode ierr; 5171 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5172 Mat_SeqAIJ *b_oth; 5173 VecScatter ctx=a->Mvctx; 5174 MPI_Comm comm=((PetscObject)ctx)->comm; 5175 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5176 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5177 PetscScalar *rvalues,*svalues; 5178 MatScalar *b_otha,*bufa,*bufA; 5179 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5180 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5181 MPI_Status *sstatus,rstatus; 5182 PetscMPIInt jj; 5183 PetscInt *cols,sbs,rbs; 5184 PetscScalar *vals; 5185 5186 PetscFunctionBegin; 5187 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5188 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5189 } 5190 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5191 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5192 5193 gen_to = (VecScatter_MPI_General*)ctx->todata; 5194 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5195 rvalues = gen_from->values; /* holds the length of receiving row */ 5196 svalues = gen_to->values; /* holds the length of sending row */ 5197 nrecvs = gen_from->n; 5198 nsends = gen_to->n; 5199 5200 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5201 srow = gen_to->indices; /* local row index to be sent */ 5202 sstarts = gen_to->starts; 5203 sprocs = gen_to->procs; 5204 sstatus = gen_to->sstatus; 5205 sbs = gen_to->bs; 5206 rstarts = gen_from->starts; 5207 rprocs = gen_from->procs; 5208 rbs = gen_from->bs; 5209 5210 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5211 if (scall == MAT_INITIAL_MATRIX){ 5212 /* i-array */ 5213 /*---------*/ 5214 /* post receives */ 5215 for (i=0; i<nrecvs; i++){ 5216 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5217 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5218 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5219 } 5220 5221 /* pack the outgoing message */ 5222 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5223 sstartsj[0] = 0; rstartsj[0] = 0; 5224 len = 0; /* total length of j or a array to be sent */ 5225 k = 0; 5226 for (i=0; i<nsends; i++){ 5227 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5228 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5229 for (j=0; j<nrows; j++) { 5230 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5231 for (l=0; l<sbs; l++){ 5232 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5233 rowlen[j*sbs+l] = ncols; 5234 len += ncols; 5235 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5236 } 5237 k++; 5238 } 5239 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5240 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5241 } 5242 /* recvs and sends of i-array are completed */ 5243 i = nrecvs; 5244 while (i--) { 5245 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5246 } 5247 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5248 5249 /* allocate buffers for sending j and a arrays */ 5250 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5251 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5252 5253 /* create i-array of B_oth */ 5254 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5255 b_othi[0] = 0; 5256 len = 0; /* total length of j or a array to be received */ 5257 k = 0; 5258 for (i=0; i<nrecvs; i++){ 5259 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5260 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5261 for (j=0; j<nrows; j++) { 5262 b_othi[k+1] = b_othi[k] + rowlen[j]; 5263 len += rowlen[j]; k++; 5264 } 5265 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5266 } 5267 5268 /* allocate space for j and a arrrays of B_oth */ 5269 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5270 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5271 5272 /* j-array */ 5273 /*---------*/ 5274 /* post receives of j-array */ 5275 for (i=0; i<nrecvs; i++){ 5276 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5277 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5278 } 5279 5280 /* pack the outgoing message j-array */ 5281 k = 0; 5282 for (i=0; i<nsends; i++){ 5283 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5284 bufJ = bufj+sstartsj[i]; 5285 for (j=0; j<nrows; j++) { 5286 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5287 for (ll=0; ll<sbs; ll++){ 5288 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5289 for (l=0; l<ncols; l++){ 5290 *bufJ++ = cols[l]; 5291 } 5292 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5293 } 5294 } 5295 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5296 } 5297 5298 /* recvs and sends of j-array are completed */ 5299 i = nrecvs; 5300 while (i--) { 5301 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5302 } 5303 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5304 } else if (scall == MAT_REUSE_MATRIX){ 5305 sstartsj = *startsj_s; 5306 rstartsj = *startsj_r; 5307 bufa = *bufa_ptr; 5308 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5309 b_otha = b_oth->a; 5310 } else { 5311 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5312 } 5313 5314 /* a-array */ 5315 /*---------*/ 5316 /* post receives of a-array */ 5317 for (i=0; i<nrecvs; i++){ 5318 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5319 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5320 } 5321 5322 /* pack the outgoing message a-array */ 5323 k = 0; 5324 for (i=0; i<nsends; i++){ 5325 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5326 bufA = bufa+sstartsj[i]; 5327 for (j=0; j<nrows; j++) { 5328 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5329 for (ll=0; ll<sbs; ll++){ 5330 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5331 for (l=0; l<ncols; l++){ 5332 *bufA++ = vals[l]; 5333 } 5334 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5335 } 5336 } 5337 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5338 } 5339 /* recvs and sends of a-array are completed */ 5340 i = nrecvs; 5341 while (i--) { 5342 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5343 } 5344 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5345 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5346 5347 if (scall == MAT_INITIAL_MATRIX){ 5348 /* put together the new matrix */ 5349 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5350 5351 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5352 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5353 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5354 b_oth->free_a = PETSC_TRUE; 5355 b_oth->free_ij = PETSC_TRUE; 5356 b_oth->nonew = 0; 5357 5358 ierr = PetscFree(bufj);CHKERRQ(ierr); 5359 if (!startsj_s || !bufa_ptr){ 5360 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5361 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5362 } else { 5363 *startsj_s = sstartsj; 5364 *startsj_r = rstartsj; 5365 *bufa_ptr = bufa; 5366 } 5367 } 5368 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5369 PetscFunctionReturn(0); 5370 } 5371 5372 #undef __FUNCT__ 5373 #define __FUNCT__ "MatGetCommunicationStructs" 5374 /*@C 5375 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5376 5377 Not Collective 5378 5379 Input Parameters: 5380 . A - The matrix in mpiaij format 5381 5382 Output Parameter: 5383 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5384 . colmap - A map from global column index to local index into lvec 5385 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5386 5387 Level: developer 5388 5389 @*/ 5390 #if defined (PETSC_USE_CTABLE) 5391 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5392 #else 5393 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5394 #endif 5395 { 5396 Mat_MPIAIJ *a; 5397 5398 PetscFunctionBegin; 5399 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5400 PetscValidPointer(lvec, 2); 5401 PetscValidPointer(colmap, 3); 5402 PetscValidPointer(multScatter, 4); 5403 a = (Mat_MPIAIJ *) A->data; 5404 if (lvec) *lvec = a->lvec; 5405 if (colmap) *colmap = a->colmap; 5406 if (multScatter) *multScatter = a->Mvctx; 5407 PetscFunctionReturn(0); 5408 } 5409 5410 EXTERN_C_BEGIN 5411 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5412 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5413 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5414 EXTERN_C_END 5415 5416 #undef __FUNCT__ 5417 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5418 /* 5419 Computes (B'*A')' since computing B*A directly is untenable 5420 5421 n p p 5422 ( ) ( ) ( ) 5423 m ( A ) * n ( B ) = m ( C ) 5424 ( ) ( ) ( ) 5425 5426 */ 5427 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5428 { 5429 PetscErrorCode ierr; 5430 Mat At,Bt,Ct; 5431 5432 PetscFunctionBegin; 5433 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5434 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5435 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5436 ierr = MatDestroy(&At);CHKERRQ(ierr); 5437 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5438 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5439 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5440 PetscFunctionReturn(0); 5441 } 5442 5443 #undef __FUNCT__ 5444 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5445 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5446 { 5447 PetscErrorCode ierr; 5448 PetscInt m=A->rmap->n,n=B->cmap->n; 5449 Mat Cmat; 5450 5451 PetscFunctionBegin; 5452 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5453 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5454 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5455 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5456 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5457 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5458 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5459 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5460 *C = Cmat; 5461 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5462 PetscFunctionReturn(0); 5463 } 5464 5465 /* ----------------------------------------------------------------*/ 5466 #undef __FUNCT__ 5467 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5468 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5469 { 5470 PetscErrorCode ierr; 5471 5472 PetscFunctionBegin; 5473 if (scall == MAT_INITIAL_MATRIX){ 5474 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5475 } 5476 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5477 PetscFunctionReturn(0); 5478 } 5479 5480 EXTERN_C_BEGIN 5481 #if defined(PETSC_HAVE_MUMPS) 5482 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5483 #endif 5484 #if defined(PETSC_HAVE_PASTIX) 5485 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5486 #endif 5487 #if defined(PETSC_HAVE_SUPERLU_DIST) 5488 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5489 #endif 5490 #if defined(PETSC_HAVE_SPOOLES) 5491 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5492 #endif 5493 EXTERN_C_END 5494 5495 /*MC 5496 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5497 5498 Options Database Keys: 5499 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5500 5501 Level: beginner 5502 5503 .seealso: MatCreateAIJ() 5504 M*/ 5505 5506 EXTERN_C_BEGIN 5507 #undef __FUNCT__ 5508 #define __FUNCT__ "MatCreate_MPIAIJ" 5509 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5510 { 5511 Mat_MPIAIJ *b; 5512 PetscErrorCode ierr; 5513 PetscMPIInt size; 5514 5515 PetscFunctionBegin; 5516 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5517 5518 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5519 B->data = (void*)b; 5520 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5521 B->assembled = PETSC_FALSE; 5522 5523 B->insertmode = NOT_SET_VALUES; 5524 b->size = size; 5525 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5526 5527 /* build cache for off array entries formed */ 5528 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5529 b->donotstash = PETSC_FALSE; 5530 b->colmap = 0; 5531 b->garray = 0; 5532 b->roworiented = PETSC_TRUE; 5533 5534 /* stuff used for matrix vector multiply */ 5535 b->lvec = PETSC_NULL; 5536 b->Mvctx = PETSC_NULL; 5537 5538 /* stuff for MatGetRow() */ 5539 b->rowindices = 0; 5540 b->rowvalues = 0; 5541 b->getrowactive = PETSC_FALSE; 5542 5543 #if defined(PETSC_HAVE_SPOOLES) 5544 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5545 "MatGetFactor_mpiaij_spooles", 5546 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5547 #endif 5548 #if defined(PETSC_HAVE_MUMPS) 5549 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5550 "MatGetFactor_aij_mumps", 5551 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5552 #endif 5553 #if defined(PETSC_HAVE_PASTIX) 5554 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5555 "MatGetFactor_mpiaij_pastix", 5556 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5557 #endif 5558 #if defined(PETSC_HAVE_SUPERLU_DIST) 5559 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5560 "MatGetFactor_mpiaij_superlu_dist", 5561 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5562 #endif 5563 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5564 "MatStoreValues_MPIAIJ", 5565 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5566 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5567 "MatRetrieveValues_MPIAIJ", 5568 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5569 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5570 "MatGetDiagonalBlock_MPIAIJ", 5571 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5572 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5573 "MatIsTranspose_MPIAIJ", 5574 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5576 "MatMPIAIJSetPreallocation_MPIAIJ", 5577 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5578 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5579 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5580 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5582 "MatDiagonalScaleLocal_MPIAIJ", 5583 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5584 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5585 "MatConvert_MPIAIJ_MPIAIJPERM", 5586 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5588 "MatConvert_MPIAIJ_MPIAIJCRL", 5589 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5590 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5591 "MatConvert_MPIAIJ_MPISBAIJ", 5592 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5594 "MatMatMult_MPIDense_MPIAIJ", 5595 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5597 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5598 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5600 "MatMatMultNumeric_MPIDense_MPIAIJ", 5601 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5603 PetscFunctionReturn(0); 5604 } 5605 EXTERN_C_END 5606 5607 #undef __FUNCT__ 5608 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5609 /*@ 5610 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5611 and "off-diagonal" part of the matrix in CSR format. 5612 5613 Collective on MPI_Comm 5614 5615 Input Parameters: 5616 + comm - MPI communicator 5617 . m - number of local rows (Cannot be PETSC_DECIDE) 5618 . n - This value should be the same as the local size used in creating the 5619 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5620 calculated if N is given) For square matrices n is almost always m. 5621 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5622 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5623 . i - row indices for "diagonal" portion of matrix 5624 . j - column indices 5625 . a - matrix values 5626 . oi - row indices for "off-diagonal" portion of matrix 5627 . oj - column indices 5628 - oa - matrix values 5629 5630 Output Parameter: 5631 . mat - the matrix 5632 5633 Level: advanced 5634 5635 Notes: 5636 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5637 must free the arrays once the matrix has been destroyed and not before. 5638 5639 The i and j indices are 0 based 5640 5641 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5642 5643 This sets local rows and cannot be used to set off-processor values. 5644 5645 You cannot later use MatSetValues() to change values in this matrix. 5646 5647 .keywords: matrix, aij, compressed row, sparse, parallel 5648 5649 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5650 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5651 @*/ 5652 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5653 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5654 { 5655 PetscErrorCode ierr; 5656 Mat_MPIAIJ *maij; 5657 5658 PetscFunctionBegin; 5659 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5660 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5661 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5662 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5663 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5664 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5665 maij = (Mat_MPIAIJ*) (*mat)->data; 5666 maij->donotstash = PETSC_TRUE; 5667 (*mat)->preallocated = PETSC_TRUE; 5668 5669 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5670 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5671 5672 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5673 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5674 5675 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5676 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5677 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5678 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5679 5680 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5681 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5682 PetscFunctionReturn(0); 5683 } 5684 5685 /* 5686 Special version for direct calls from Fortran 5687 */ 5688 #include <petsc-private/fortranimpl.h> 5689 5690 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5691 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5692 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5693 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5694 #endif 5695 5696 /* Change these macros so can be used in void function */ 5697 #undef CHKERRQ 5698 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5699 #undef SETERRQ2 5700 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5701 #undef SETERRQ3 5702 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5703 #undef SETERRQ 5704 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5705 5706 EXTERN_C_BEGIN 5707 #undef __FUNCT__ 5708 #define __FUNCT__ "matsetvaluesmpiaij_" 5709 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5710 { 5711 Mat mat = *mmat; 5712 PetscInt m = *mm, n = *mn; 5713 InsertMode addv = *maddv; 5714 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5715 PetscScalar value; 5716 PetscErrorCode ierr; 5717 5718 MatCheckPreallocated(mat,1); 5719 if (mat->insertmode == NOT_SET_VALUES) { 5720 mat->insertmode = addv; 5721 } 5722 #if defined(PETSC_USE_DEBUG) 5723 else if (mat->insertmode != addv) { 5724 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5725 } 5726 #endif 5727 { 5728 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5729 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5730 PetscBool roworiented = aij->roworiented; 5731 5732 /* Some Variables required in the macro */ 5733 Mat A = aij->A; 5734 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5735 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5736 MatScalar *aa = a->a; 5737 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5738 Mat B = aij->B; 5739 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5740 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5741 MatScalar *ba = b->a; 5742 5743 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5744 PetscInt nonew = a->nonew; 5745 MatScalar *ap1,*ap2; 5746 5747 PetscFunctionBegin; 5748 for (i=0; i<m; i++) { 5749 if (im[i] < 0) continue; 5750 #if defined(PETSC_USE_DEBUG) 5751 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5752 #endif 5753 if (im[i] >= rstart && im[i] < rend) { 5754 row = im[i] - rstart; 5755 lastcol1 = -1; 5756 rp1 = aj + ai[row]; 5757 ap1 = aa + ai[row]; 5758 rmax1 = aimax[row]; 5759 nrow1 = ailen[row]; 5760 low1 = 0; 5761 high1 = nrow1; 5762 lastcol2 = -1; 5763 rp2 = bj + bi[row]; 5764 ap2 = ba + bi[row]; 5765 rmax2 = bimax[row]; 5766 nrow2 = bilen[row]; 5767 low2 = 0; 5768 high2 = nrow2; 5769 5770 for (j=0; j<n; j++) { 5771 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5772 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5773 if (in[j] >= cstart && in[j] < cend){ 5774 col = in[j] - cstart; 5775 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5776 } else if (in[j] < 0) continue; 5777 #if defined(PETSC_USE_DEBUG) 5778 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5779 #endif 5780 else { 5781 if (mat->was_assembled) { 5782 if (!aij->colmap) { 5783 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5784 } 5785 #if defined (PETSC_USE_CTABLE) 5786 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5787 col--; 5788 #else 5789 col = aij->colmap[in[j]] - 1; 5790 #endif 5791 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5792 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5793 col = in[j]; 5794 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5795 B = aij->B; 5796 b = (Mat_SeqAIJ*)B->data; 5797 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5798 rp2 = bj + bi[row]; 5799 ap2 = ba + bi[row]; 5800 rmax2 = bimax[row]; 5801 nrow2 = bilen[row]; 5802 low2 = 0; 5803 high2 = nrow2; 5804 bm = aij->B->rmap->n; 5805 ba = b->a; 5806 } 5807 } else col = in[j]; 5808 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5809 } 5810 } 5811 } else { 5812 if (!aij->donotstash) { 5813 if (roworiented) { 5814 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5815 } else { 5816 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5817 } 5818 } 5819 } 5820 }} 5821 PetscFunctionReturnVoid(); 5822 } 5823 EXTERN_C_END 5824 5825